Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

author: David Woodhouse <dwmw2@infradead.org> 2007-07-11 09:55:48 -0400
committer: David Woodhouse <dwmw2@infradead.org> 2007-07-11 09:55:48 -0400
commit: db1b39d8b860e3716620c225bc86e0ec41764e34 (patch)
tree: 8739074db733ef767400ea92cfbfed9352ddb92d /fs
parent: a6bc432e296dfa1f05d4b586ca5ca3085a2d42d7 (diff)
parent: 4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff)
133 files changed, 4005 insertions, 2476 deletions
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index f544a2855923..36e381c6a99a 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -33,7 +33,7 @@ const struct file_operations adfs_file_operations = {
        .fsync          = file_fsync,
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations adfs_file_inode_operations = {
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c8796906f584..c314a35f0918 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -35,7 +35,7 @@ const struct file_operations affs_file_operations = {
        .open           = affs_file_open,
        .release        = affs_file_release,
        .fsync          = file_fsync,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations affs_file_inode_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 9c0e721d9fc2..aede7eb66dd4 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -32,7 +32,7 @@ const struct file_operations afs_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = afs_file_write,
        .mmap           = generic_file_readonly_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .fsync          = afs_fsync,
 };
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 329ee473eede..521ff7caadbd 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -114,12 +114,6 @@ static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
        return -EIO;
 }
-static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
-                        size_t count, read_actor_t actor, void *target)
-{
-        return -EIO;
-}
 static ssize_t bad_file_sendpage(struct file *file, struct page *page,
                        int off, size_t len, loff_t *pos, int more)
 {
@@ -182,7 +176,6 @@ static const struct file_operations bad_file_ops =
        .aio_fsync      = bad_file_aio_fsync,
        .fasync         = bad_file_fasync,
        .lock           = bad_file_lock,
-        .sendfile       = bad_file_sendfile,
        .sendpage       = bad_file_sendpage,
        .get_unmapped_area = bad_file_get_unmapped_area,
        .check_flags    = bad_file_check_flags,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index ef4d1fa04e65..24310e9ee05a 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -24,7 +24,7 @@ const struct file_operations bfs_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 static int bfs_move_block(unsigned long from, unsigned long to, struct super_block *sb)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fa8ea33ab0be..08e4414b8374 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1499,6 +1499,9 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 #endif
        int thread_status_size = 0;
        elf_addr_t *auxv;
+#ifdef ELF_CORE_WRITE_EXTRA_NOTES
+        int extra_notes_size;
+#endif
        /*
         * We no longer stop all VM operations.
@@ -1628,7 +1631,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
                sz += thread_status_size;
 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
-                sz += ELF_CORE_EXTRA_NOTES_SIZE;
+                extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
+                sz += extra_notes_size;
 #endif
                fill_elf_note_phdr(&phdr, sz, offset);
@@ -1674,6 +1678,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
        ELF_CORE_WRITE_EXTRA_NOTES;
+        foffset += extra_notes_size;
 #endif
        /* write out the thread status notes section */
diff --git a/fs/bio.c b/fs/bio.c
index 093345f00128..33e46340a766 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1223,8 +1223,6 @@ EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
 EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
-EXPORT_SYMBOL(bio_map_user);
-EXPORT_SYMBOL(bio_unmap_user);
 EXPORT_SYMBOL(bio_map_kern);
 EXPORT_SYMBOL(bio_pair_release);
 EXPORT_SYMBOL(bio_split);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ea1480a16f51..b3e9bfa748cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1346,7 +1346,6 @@ const struct file_operations def_blk_fops = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7c04752b76cb..8b0cbf4a4ad0 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -616,7 +616,7 @@ const struct file_operations cifs_file_ops = {
        .fsync = cifs_fsync,
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
-        .sendfile = generic_file_sendfile,
+        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
@@ -637,7 +637,7 @@ const struct file_operations cifs_file_direct_ops = {
        .lock = cifs_lock,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
-        .sendfile = generic_file_sendfile, /* BB removeme BB */
+        .splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
@@ -656,7 +656,7 @@ const struct file_operations cifs_file_nobrl_ops = {
        .fsync = cifs_fsync,
        .flush = cifs_flush,
        .mmap  = cifs_file_mmap,
-        .sendfile = generic_file_sendfile,
+        .splice_read = generic_file_splice_read,
        .llseek = cifs_llseek,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
@@ -676,7 +676,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
        .release = cifs_close,
        .fsync = cifs_fsync,
        .flush = cifs_flush,
-        .sendfile = generic_file_sendfile, /* BB removeme BB */
+        .splice_read = generic_file_splice_read,
 #ifdef CONFIG_CIFS_POSIX
        .ioctl  = cifs_ioctl,
 #endif /* CONFIG_CIFS_POSIX */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 5ef2b609ec7d..99dbe866816d 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -47,8 +47,9 @@ coda_file_read(struct file *coda_file, char __user *buf, size_t count, loff_t *p
 }
 static ssize_t
-coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
+coda_file_splice_read(struct file *coda_file, loff_t *ppos,
-                   read_actor_t actor, void *target)
+                      struct pipe_inode_info *pipe, size_t count,
+                      unsigned int flags)
 {
        struct coda_file_info *cfi;
        struct file *host_file;
@@ -57,10 +58,10 @@ coda_file_sendfile(struct file *coda_file, loff_t *ppos, size_t count,
        BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
        host_file = cfi->cfi_container;
-        if (!host_file->f_op || !host_file->f_op->sendfile)
+        if (!host_file->f_op || !host_file->f_op->splice_read)
                return -EINVAL;
-        return host_file->f_op->sendfile(host_file, ppos, count, actor, target);
+        return host_file->f_op->splice_read(host_file, ppos, pipe, count,flags);
 }
 static ssize_t
@@ -295,6 +296,6 @@ const struct file_operations coda_file_operations = {
        .flush          = coda_flush,
        .release        = coda_release,
        .fsync          = coda_fsync,
-        .sendfile       = coda_file_sendfile,
+        .splice_read    = coda_file_splice_read,
 };
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index 69a94690e493..54bcc00ec8df 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,7 +3,7 @@ menu "Distributed Lock Manager"
 config DLM
        tristate "Distributed Lock Manager (DLM)"
-        depends on IPV6 || IPV6=n
+        depends on SYSFS && (IPV6 || IPV6=n)
        select CONFIGFS_FS
        select IP_SCTP
        help
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f39..d248e60951ba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y :=			ast.o \
                                member.o \
                                memory.o \
                                midcomms.o \
+                                netlink.o \
                                lowcomms.o \
                                rcom.o \
                                recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd1434..5069b2cb5a1f 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
        unsigned int cl_scan_secs;
        unsigned int cl_log_debug;
        unsigned int cl_protocol;
+        unsigned int cl_timewarn_cs;
 };
 enum {
@@ -103,6 +104,7 @@ enum {
        CLUSTER_ATTR_SCAN_SECS,
        CLUSTER_ATTR_LOG_DEBUG,
        CLUSTER_ATTR_PROTOCOL,
+        CLUSTER_ATTR_TIMEWARN_CS,
 };
 struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
 static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
        [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
        [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
        [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+        [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
        NULL,
 };
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
        cl->cl_toss_secs = dlm_config.ci_toss_secs;
        cl->cl_scan_secs = dlm_config.ci_scan_secs;
        cl->cl_log_debug = dlm_config.ci_log_debug;
+        cl->cl_protocol = dlm_config.ci_protocol;
+        cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
        space_list = &sps->ss_group;
        comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
 static struct space *get_space(char *name)
 {
+        struct config_item *i;
        if (!space_list)
                return NULL;
-        return to_space(config_group_find_obj(space_list, name));
+        down(&space_list->cg_subsys->su_sem);
+        i = config_group_find_obj(space_list, name);
+        up(&space_list->cg_subsys->su_sem);
+        return to_space(i);
 }
 static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
                        if (cm->nodeid != nodeid)
                                continue;
                        found = 1;
+                        config_item_get(i);
                        break;
                } else {
                        if (!cm->addr_count ||
                            memcmp(cm->addr[0], addr, sizeof(*addr)))
                                continue;
                        found = 1;
+                        config_item_get(i);
                        break;
                }
        }
        up(&clusters_root.subsys.su_sem);
-        if (found)
+        if (!found)
-                config_item_get(i);
-        else
                cm = NULL;
        return cm;
 }
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
+#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 struct dlm_config_info dlm_config = {
        .ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
        .ci_toss_secs = DEFAULT_TOSS_SECS,
        .ci_scan_secs = DEFAULT_SCAN_SECS,
        .ci_log_debug = DEFAULT_LOG_DEBUG,
-        .ci_protocol = DEFAULT_PROTOCOL
+        .ci_protocol = DEFAULT_PROTOCOL,
+        .ci_timewarn_cs = DEFAULT_TIMEWARN_CS
 };
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5e..a3170fe22090 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
        int ci_scan_secs;
        int ci_log_debug;
        int ci_protocol;
+        int ci_timewarn_cs;
 };
 extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e02..12c3bfd5e660 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 #include "dlm_internal.h"
+#include "lock.h"
 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
 struct rsb_iter {
        int entry;
+        int locks;
+        int header;
        struct dlm_ls *ls;
        struct list_head *next;
        struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
        }
 }
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
-                       struct dlm_rsb *res)
+                                struct dlm_rsb *res)
 {
        seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
        struct dlm_lkb *lkb;
        int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
+        lock_rsb(res);
        seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
        for (i = 0; i < res->res_length; i++) {
                if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
        /* Print the locks attached to this resource */
        seq_printf(s, "Granted Queue\n");
        list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-                print_lock(s, lkb, res);
+                print_resource_lock(s, lkb, res);
        seq_printf(s, "Conversion Queue\n");
        list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-                print_lock(s, lkb, res);
+                print_resource_lock(s, lkb, res);
        seq_printf(s, "Waiting Queue\n");
        list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-                print_lock(s, lkb, res);
+                print_resource_lock(s, lkb, res);
        if (list_empty(&res->res_lookup))
                goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
                seq_printf(s, "\n");
        }
 out:
+        unlock_rsb(res);
+        return 0;
+}
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+        struct dlm_user_args *ua;
+        unsigned int waiting = 0;
+        uint64_t xid = 0;
+        if (lkb->lkb_flags & DLM_IFL_USER) {
+                ua = (struct dlm_user_args *) lkb->lkb_astparam;
+                if (ua)
+                        xid = ua->xid;
+        }
+        if (lkb->lkb_timestamp)
+                waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+        /* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+           r_nodeid r_len r_name */
+        seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+                   lkb->lkb_id,
+                   lkb->lkb_nodeid,
+                   lkb->lkb_remid,
+                   lkb->lkb_ownpid,
+                   (unsigned long long)xid,
+                   lkb->lkb_exflags,
+                   lkb->lkb_flags,
+                   lkb->lkb_status,
+                   lkb->lkb_grmode,
+                   lkb->lkb_rqmode,
+                   waiting,
+                   r->res_nodeid,
+                   r->res_length,
+                   r->res_name);
+}
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+        struct dlm_lkb *lkb;
+        lock_rsb(r);
+        list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+                print_lock(s, lkb, r);
+        list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+                print_lock(s, lkb, r);
+        list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+                print_lock(s, lkb, r);
+        unlock_rsb(r);
        return 0;
 }
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
                        read_lock(&ls->ls_rsbtbl[i].lock);
                        if (!list_empty(&ls->ls_rsbtbl[i].list)) {
                                ri->next = ls->ls_rsbtbl[i].list.next;
+                                ri->rsb = list_entry(ri->next, struct dlm_rsb,
+                                                        res_hashchain);
+                                dlm_hold_rsb(ri->rsb);
                                read_unlock(&ls->ls_rsbtbl[i].lock);
                                break;
                        }
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
                if (ri->entry >= ls->ls_rsbtbl_size)
                        return 1;
        } else {
+                struct dlm_rsb *old = ri->rsb;
                i = ri->entry;
                read_lock(&ls->ls_rsbtbl[i].lock);
                ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
                        ri->next = NULL;
                        ri->entry++;
                        read_unlock(&ls->ls_rsbtbl[i].lock);
+                        dlm_put_rsb(old);
                        goto top;
                }
+                ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+                dlm_hold_rsb(ri->rsb);
                read_unlock(&ls->ls_rsbtbl[i].lock);
+                dlm_put_rsb(old);
        }
-        ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
        return 0;
 }
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 {
        struct rsb_iter *ri;
-        ri = kmalloc(sizeof *ri, GFP_KERNEL);
+        ri = kzalloc(sizeof *ri, GFP_KERNEL);
        if (!ri)
                return NULL;
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
        struct rsb_iter *ri = iter_ptr;
-        print_resource(ri->rsb, file);
+        if (ri->locks) {
+                if (ri->header) {
+                        seq_printf(file, "id nodeid remid pid xid exflags flags "
+                                         "sts grmode rqmode time_ms r_nodeid "
+                                         "r_len r_name\n");
+                        ri->header = 0;
+                }
+                print_locks(ri->rsb, file);
+        } else {
+                print_resource(ri->rsb, file);
+        }
        return 0;
 }
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
 };
 /*
+ * Dump state in compact per-lock listing
+ */
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+        struct rsb_iter *ri;
+        ri = kzalloc(sizeof *ri, GFP_KERNEL);
+        if (!ri)
+                return NULL;
+        ri->ls = ls;
+        ri->entry = 0;
+        ri->next = NULL;
+        ri->locks = 1;
+        if (*pos == 0)
+                ri->header = 1;
+        if (rsb_iter_next(ri)) {
+                rsb_iter_free(ri);
+                return NULL;
+        }
+        return ri;
+}
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+        struct rsb_iter *ri;
+        loff_t n = *pos;
+        ri = locks_iter_init(file->private, pos);
+        if (!ri)
+                return NULL;
+        while (n--) {
+                if (rsb_iter_next(ri)) {
+                        rsb_iter_free(ri);
+                        return NULL;
+                }
+        }
+        return ri;
+}
+static struct seq_operations locks_seq_ops = {
+        .start = locks_seq_start,
+        .next  = rsb_seq_next,
+        .stop  = rsb_seq_stop,
+        .show  = rsb_seq_show,
+};
+static int locks_open(struct inode *inode, struct file *file)
+{
+        struct seq_file *seq;
+        int ret;
+        ret = seq_open(file, &locks_seq_ops);
+        if (ret)
+                return ret;
+        seq = file->private_data;
+        seq->private = inode->i_private;
+        return 0;
+}
+static const struct file_operations locks_fops = {
+        .owner   = THIS_MODULE,
+        .open    = locks_open,
+        .read    = seq_read,
+        .llseek  = seq_lseek,
+        .release = seq_release
+};
+/*
 * dump lkb's on the ls_waiters list
 */
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
                return -ENOMEM;
        }
+        memset(name, 0, sizeof(name));
+        snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+        ls->ls_debug_locks_dentry = debugfs_create_file(name,
+                                                        S_IFREG | S_IRUGO,
+                                                        dlm_root,
+                                                        ls,
+                                                        &locks_fops);
+        if (!ls->ls_debug_locks_dentry) {
+                debugfs_remove(ls->ls_debug_waiters_dentry);
+                debugfs_remove(ls->ls_debug_rsb_dentry);
+                return -ENOMEM;
+        }
        return 0;
 }
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
                debugfs_remove(ls->ls_debug_rsb_dentry);
        if (ls->ls_debug_waiters_dentry)
                debugfs_remove(ls->ls_debug_waiters_dentry);
+        if (ls->ls_debug_locks_dentry)
+                debugfs_remove(ls->ls_debug_locks_dentry);
 }
 int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a0..74901e981e10 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
        void                    *bastaddr;
        int                     mode;
        struct dlm_lksb         *lksb;
+        unsigned long           timeout;
 };
@@ -213,6 +214,9 @@ struct dlm_args {
 #define DLM_IFL_OVERLAP_UNLOCK  0x00080000
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE       0x00200000
+#define DLM_IFL_WATCH_TIMEWARN  0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL  0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL 0x01000000
 #define DLM_IFL_USER            0x00000001
 #define DLM_IFL_ORPHAN          0x00000002
@@ -243,6 +247,9 @@ struct dlm_lkb {
        struct list_head        lkb_wait_reply; /* waiting for remote reply */
        struct list_head        lkb_astqueue;   /* need ast to be sent */
        struct list_head        lkb_ownqueue;   /* list of locks for a process */
+        struct list_head        lkb_time_list;
+        unsigned long           lkb_timestamp;
+        unsigned long           lkb_timeout_cs;
        char                    *lkb_lvbptr;
        struct dlm_lksb         *lkb_lksb;      /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
        struct mutex            ls_orphans_mutex;
        struct list_head        ls_orphans;
+        struct mutex            ls_timeout_mutex;
+        struct list_head        ls_timeout;
        struct list_head        ls_nodes;       /* current nodes in ls */
        struct list_head        ls_nodes_gone;  /* dead node list, recovery */
        int                     ls_num_nodes;   /* number of nodes in ls */
        int                     ls_low_nodeid;
        int                     ls_total_weight;
        int                     *ls_node_array;
+        gfp_t                   ls_allocation;
        struct dlm_rsb          ls_stub_rsb;    /* for returning errors */
        struct dlm_lkb          ls_stub_lkb;    /* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
        struct dentry           *ls_debug_rsb_dentry; /* debugfs */
        struct dentry           *ls_debug_waiters_dentry; /* debugfs */
+        struct dentry           *ls_debug_locks_dentry; /* debugfs */
        wait_queue_head_t       ls_uevent_wait; /* user part of join/leave */
        int                     ls_uevent_result;
+        struct completion       ls_members_done;
+        int                     ls_members_result;
        struct miscdevice       ls_device;
@@ -472,6 +486,7 @@ struct dlm_ls {
        struct task_struct      *ls_recoverd_task;
        struct mutex            ls_recoverd_active;
        spinlock_t              ls_recover_lock;
+        unsigned long           ls_recover_begin; /* jiffies timestamp */
        uint32_t                ls_recover_status; /* DLM_RS_ */
        uint64_t                ls_recover_seq;
        struct dlm_recover      *ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
 #define LSFL_RCOM_READY         3
 #define LSFL_RCOM_WAIT          4
 #define LSFL_UEVENT_WAIT        5
+#define LSFL_TIMEWARN           6
 /* much of this is just saving user space pointers associated with the
   lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
        void __user             *castaddr;
        void __user             *bastparam;
        void __user             *bastaddr;
+        uint64_t                xid;
 };
 #define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96b..b455919c1998 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                                    struct dlm_message *ms);
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
 /*
 * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
 /* Threads cannot use the lockspace while it's being recovered */
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
 {
        down_read(&ls->ls_in_recovery);
 }
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
 {
        up_read(&ls->ls_in_recovery);
 }
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
 {
        return down_read_trylock(&ls->ls_in_recovery);
 }
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
        if (is_master_copy(lkb))
                return;
+        del_timeout(lkb);
        DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
+        /* if the operation was a cancel, then return -DLM_ECANCEL, if a
+           timeout caused the cancel then return -ETIMEDOUT */
+        if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+                lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+                rv = -ETIMEDOUT;
+        }
+        if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+                lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+                rv = -EDEADLK;
+        }
        lkb->lkb_lksb->sb_status = rv;
        lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
        kref_init(&lkb->lkb_ref);
        INIT_LIST_HEAD(&lkb->lkb_ownqueue);
        INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+        INIT_LIST_HEAD(&lkb->lkb_time_list);
        get_random_bytes(&bucket, sizeof(bucket));
        bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
 {
        int i;
-        if (dlm_locking_stopped(ls))
-                return;
        for (i = 0; i < ls->ls_rsbtbl_size; i++) {
                shrink_bucket(ls, i);
+                if (dlm_locking_stopped(ls))
+                        break;
                cond_resched();
        }
 }
+static void add_timeout(struct dlm_lkb *lkb)
+{
+        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+        if (is_master_copy(lkb)) {
+                lkb->lkb_timestamp = jiffies;
+                return;
+        }
+        if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+            !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+                lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+                goto add_it;
+        }
+        if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+                goto add_it;
+        return;
+ add_it:
+        DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+        mutex_lock(&ls->ls_timeout_mutex);
+        hold_lkb(lkb);
+        lkb->lkb_timestamp = jiffies;
+        list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+        mutex_unlock(&ls->ls_timeout_mutex);
+}
+static void del_timeout(struct dlm_lkb *lkb)
+{
+        struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+        mutex_lock(&ls->ls_timeout_mutex);
+        if (!list_empty(&lkb->lkb_time_list)) {
+                list_del_init(&lkb->lkb_time_list);
+                unhold_lkb(lkb);
+        }
+        mutex_unlock(&ls->ls_timeout_mutex);
+}
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
+   and then lock rsb because of lock ordering in add_timeout.  We may need
+   to specify some special timeout-related bits in the lkb that are just to
+   be accessed under the timeout_mutex. */
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+        struct dlm_rsb *r;
+        struct dlm_lkb *lkb;
+        int do_cancel, do_warn;
+        for (;;) {
+                if (dlm_locking_stopped(ls))
+                        break;
+                do_cancel = 0;
+                do_warn = 0;
+                mutex_lock(&ls->ls_timeout_mutex);
+                list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+                        if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+                            time_after_eq(jiffies, lkb->lkb_timestamp +
+                                          lkb->lkb_timeout_cs * HZ/100))
+                                do_cancel = 1;
+                        if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+                            time_after_eq(jiffies, lkb->lkb_timestamp +
+                                           dlm_config.ci_timewarn_cs * HZ/100))
+                                do_warn = 1;
+                        if (!do_cancel && !do_warn)
+                                continue;
+                        hold_lkb(lkb);
+                        break;
+                }
+                mutex_unlock(&ls->ls_timeout_mutex);
+                if (!do_cancel && !do_warn)
+                        break;
+                r = lkb->lkb_resource;
+                hold_rsb(r);
+                lock_rsb(r);
+                if (do_warn) {
+                        /* clear flag so we only warn once */
+                        lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+                        if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+                                del_timeout(lkb);
+                        dlm_timeout_warn(lkb);
+                }
+                if (do_cancel) {
+                        log_debug(ls, "timeout cancel %x node %d %s",
+                                  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                        lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+                        lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+                        del_timeout(lkb);
+                        _cancel_lock(r, lkb);
+                }
+                unlock_rsb(r);
+                unhold_rsb(r);
+                dlm_put_lkb(lkb);
+        }
+}
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+   dlm_recoverd before checking/setting ls_recover_begin. */
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+        struct dlm_lkb *lkb;
+        long adj = jiffies - ls->ls_recover_begin;
+        ls->ls_recover_begin = 0;
+        mutex_lock(&ls->ls_timeout_mutex);
+        list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+                lkb->lkb_timestamp += adj;
+        mutex_unlock(&ls->ls_timeout_mutex);
+}
 /* lkb is master or local copy */
 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
 * queue for one resource.  The granted mode of each lock blocks the requested
 * mode of the other lock."
 *
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
+ * convert queue from being granted, then deadlk/demote lkb.
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
 *
 * Example:
 * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
 *
 * The first lock can't be granted because of the granted mode of the second
 * lock and the second lock can't be granted because it's not first in the
- * list.  We demote the granted mode of the second lock (the lkb passed to this
+ * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
- * function).
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ *   blocked by the granted mode of lkb2, and there was nothing on the
+ *   granted queue preventing lkb1 from being granted immediately, i.e.
+ *   lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away.  It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
 *
- * After the resolution, the "grant pending" function needs to go back and try
+ * Now, we detect and take action on conversion deadlocks immediately when
- * to grant locks on the convert queue again since the first lock can now be
+ * they're created, even if they may not be immediately consequential.  If
- * granted.
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
 */
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
 {
-        struct dlm_lkb *this, *first = NULL, *self = NULL;
+        struct dlm_lkb *lkb1;
+        int lkb_is_ahead = 0;
-        list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
+        list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
-                if (!first)
+                if (lkb1 == lkb2) {
-                        first = this;
+                        lkb_is_ahead = 1;
-                if (this == lkb) {
-                        self = lkb;
                        continue;
                }
-                if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
+                if (!lkb_is_ahead) {
-                        return 1;
+                        if (!modes_compat(lkb2, lkb1))
-        }
+                                return 1;
+                } else {
-        /* if lkb is on the convert queue and is preventing the first
+                        if (!modes_compat(lkb2, lkb1) &&
-           from being granted, then there's deadlock and we demote lkb.
+                            !modes_compat(lkb1, lkb2))
-           multiple converting locks may need to do this before the first
+                                return 1;
-           converting lock can be granted. */
+                }
-        if (self && self != first) {
-                if (!modes_compat(lkb, first) &&
-                    !queue_conflict(&rsb->res_grantqueue, first))
-                        return 1;
        }
        return 0;
 }
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
        if (!now && !conv && list_empty(&r->res_convertqueue) &&
            first_in_list(lkb, &r->res_waitqueue))
                return 1;
 out:
-        /*
-         * The following, enabled by CONVDEADLK, departs from VMS.
-         */
-        if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
-            conversion_deadlock_detect(r, lkb)) {
-                lkb->lkb_grmode = DLM_LOCK_NL;
-                lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
-        }
        return 0;
 }
-/*
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
+                          int *err)
- * simple way to provide a big optimization to applications that can use them.
- */
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 {
-        uint32_t flags = lkb->lkb_exflags;
        int rv;
        int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+        int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+        if (err)
+                *err = 0;
        rv = _can_be_granted(r, lkb, now);
        if (rv)
                goto out;
-        if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+        /*
+         * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+         * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+         * cancels one of the locks.
+         */
+        if (is_convert && can_be_queued(lkb) &&
+            conversion_deadlock_detect(r, lkb)) {
+                if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+                        lkb->lkb_grmode = DLM_LOCK_NL;
+                        lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+                } else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+                        if (err)
+                                *err = -EDEADLK;
+                        else {
+                                log_print("can_be_granted deadlock %x now %d",
+                                          lkb->lkb_id, now);
+                                dlm_dump_rsb(r);
+                        }
+                }
                goto out;
+        }
-        if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+        /*
+         * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+         * to grant a request in a mode other than the normal rqmode.  It's a
+         * simple way to provide a big optimization to applications that can
+         * use them.
+         */
+        if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
                alt = DLM_LOCK_PR;
-        else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+        else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
                alt = DLM_LOCK_CW;
        if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
        return rv;
 }
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+   for locks pending on the convert list.  Once verified (watch for these
+   log_prints), we should be able to just call _can_be_granted() and not
+   bother with the demote/deadlk cases here (and there's no easy way to deal
+   with a deadlk here, we'd have to generate something like grant_lock with
+   the deadlk error.) */
+/* returns the highest requested mode of all blocked conversions */
 static int grant_pending_convert(struct dlm_rsb *r, int high)
 {
        struct dlm_lkb *lkb, *s;
        int hi, demoted, quit, grant_restart, demote_restart;
+        int deadlk;
        quit = 0;
 restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
        list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
                demoted = is_demoted(lkb);
-                if (can_be_granted(r, lkb, 0)) {
+                deadlk = 0;
+                if (can_be_granted(r, lkb, 0, &deadlk)) {
                        grant_lock_pending(r, lkb);
                        grant_restart = 1;
-                } else {
+                        continue;
-                        hi = max_t(int, lkb->lkb_rqmode, hi);
-                        if (!demoted && is_demoted(lkb))
-                                demote_restart = 1;
                }
+                if (!demoted && is_demoted(lkb)) {
+                        log_print("WARN: pending demoted %x node %d %s",
+                                  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                        demote_restart = 1;
+                        continue;
+                }
+                if (deadlk) {
+                        log_print("WARN: pending deadlock %x node %d %s",
+                                  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+                        dlm_dump_rsb(r);
+                        continue;
+                }
+                hi = max_t(int, lkb->lkb_rqmode, hi);
        }
        if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
        struct dlm_lkb *lkb, *s;
        list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
-                if (can_be_granted(r, lkb, 0))
+                if (can_be_granted(r, lkb, 0, NULL))
                        grant_lock_pending(r, lkb);
                else
                        high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 }
 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
-                         int namelen, uint32_t parent_lkid, void *ast,
+                         int namelen, unsigned long timeout_cs, void *ast,
                         void *astarg, void *bast, struct dlm_args *args)
 {
        int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
        if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
                goto out;
-        /* parent/child locks not yet supported */
-        if (parent_lkid)
-                goto out;
        if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
                goto out;
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
        args->astaddr = ast;
        args->astparam = (long) astarg;
        args->bastaddr = bast;
+        args->timeout = timeout_cs;
        args->mode = mode;
        args->lksb = lksb;
        rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
        lkb->lkb_lksb = args->lksb;
        lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
        lkb->lkb_ownpid = (int) current->pid;
+        lkb->lkb_timeout_cs = args->timeout;
        rv = 0;
 out:
        return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
                if (is_overlap(lkb))
                        goto out;
+                /* don't let scand try to do a cancel */
+                del_timeout(lkb);
                if (lkb->lkb_flags & DLM_IFL_RESEND) {
                        lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
                        rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
                if (is_overlap_unlock(lkb))
                        goto out;
+                /* don't let scand try to do a cancel */
+                del_timeout(lkb);
                if (lkb->lkb_flags & DLM_IFL_RESEND) {
                        lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
                        rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
        int error = 0;
-        if (can_be_granted(r, lkb, 1)) {
+        if (can_be_granted(r, lkb, 1, NULL)) {
                grant_lock(r, lkb);
                queue_cast(r, lkb, 0);
                goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
                error = -EINPROGRESS;
                add_lkb(r, lkb, DLM_LKSTS_WAITING);
                send_blocking_asts(r, lkb);
+                add_timeout(lkb);
                goto out;
        }
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
        int error = 0;
+        int deadlk = 0;
        /* changing an existing lock may allow others to be granted */
-        if (can_be_granted(r, lkb, 1)) {
+        if (can_be_granted(r, lkb, 1, &deadlk)) {
                grant_lock(r, lkb);
                queue_cast(r, lkb, 0);
                grant_pending_locks(r);
                goto out;
        }
+        /* can_be_granted() detected that this lock would block in a conversion
+           deadlock, so we leave it on the granted queue and return EDEADLK in
+           the ast for the convert. */
+        if (deadlk) {
+                /* it's left on the granted queue */
+                log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+                          lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+                          lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+                revert_lock(r, lkb);
+                queue_cast(r, lkb, -EDEADLK);
+                error = -EDEADLK;
+                goto out;
+        }
        /* is_demoted() means the can_be_granted() above set the grmode
           to NL, and left us on the granted queue.  This auto-demotion
           (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
                send_blocking_asts(r, lkb);
+                add_timeout(lkb);
                goto out;
        }
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
        if (!ls)
                return -EINVAL;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        if (convert)
                error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
        if (error)
                goto out;
-        error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+        error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
                              astarg, bast, &args);
        if (error)
                goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 out_put:
        if (convert || error)
                __put_lkb(ls, lkb);
-        if (error == -EAGAIN)
+        if (error == -EAGAIN || error == -EDEADLK)
                error = 0;
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        dlm_put_lockspace(ls);
        return error;
 }
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
        if (!ls)
                return -EINVAL;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        dlm_put_lockspace(ls);
        return error;
 }
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
           pass into lowcomms_commit and a message buffer (mb) that we
           write our data into */
-        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh)
                return -ENOBUFS;
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
                lkb->lkb_remid = ms->m_lkid;
                if (is_altmode(lkb))
                        munge_altmode(lkb, ms);
-                if (result)
+                if (result) {
                        add_lkb(r, lkb, DLM_LKSTS_WAITING);
-                else {
+                        add_timeout(lkb);
+                } else {
                        grant_lock_pc(r, lkb, ms);
                        queue_cast(r, lkb, 0);
                }
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                queue_cast(r, lkb, -EAGAIN);
                break;
+        case -EDEADLK:
+                receive_flags_reply(lkb, ms);
+                revert_lock_pc(r, lkb);
+                queue_cast(r, lkb, -EDEADLK);
+                break;
        case -EINPROGRESS:
                /* convert was queued on remote master */
                receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
                        munge_demoted(lkb, ms);
                del_lkb(r, lkb);
                add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+                add_timeout(lkb);
                break;
        case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
        case -DLM_ECANCEL:
                receive_flags_reply(lkb, ms);
                revert_lock_pc(r, lkb);
-                if (ms->m_result)
+                queue_cast(r, lkb, -DLM_ECANCEL);
-                        queue_cast(r, lkb, -DLM_ECANCEL);
                break;
        case 0:
                break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                        }
                }
-                if (lock_recovery_try(ls))
+                if (dlm_lock_recovery_try(ls))
                        break;
                schedule();
        }
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
                log_error(ls, "unknown message type %d", ms->m_type);
        }
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
 out:
        dlm_put_lockspace(ls);
        dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
                     int mode, uint32_t flags, void *name, unsigned int namelen,
-                     uint32_t parent_lkid)
+                     unsigned long timeout_cs)
 {
        struct dlm_lkb *lkb;
        struct dlm_args args;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = create_lkb(ls, &lkb);
        if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
           When DLM_IFL_USER is set, the dlm knows that this is a userspace
           lock and that lkb_astparam is the dlm_user_args structure. */
-        error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+        error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
                              DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
        lkb->lkb_flags |= DLM_IFL_USER;
        ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
        list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
        spin_unlock(&ua->proc->locks_spin);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        return error;
 }
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-                     int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+                     int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+                     unsigned long timeout_cs)
 {
        struct dlm_lkb *lkb;
        struct dlm_args args;
        struct dlm_user_args *ua;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        if (lvb_in && ua->lksb.sb_lvbptr)
                memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
+        ua->xid = ua_tmp->xid;
        ua->castparam = ua_tmp->castparam;
        ua->castaddr = ua_tmp->castaddr;
        ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        ua->user_lksb = ua_tmp->user_lksb;
        ua->old_mode = lkb->lkb_grmode;
-        error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
+        error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
-                              ua, DLM_FAKE_USER_AST, &args);
+                              DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
        if (error)
                goto out_put;
        error = convert_lock(ls, lkb, &args);
-        if (error == -EINPROGRESS || error == -EAGAIN)
+        if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
                error = 0;
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        struct dlm_user_args *ua;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        struct dlm_user_args *ua;
        int error;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        error = find_lkb(ls, lkid, &lkb);
        if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 out_put:
        dlm_put_lkb(lkb);
 out:
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
        kfree(ua_tmp);
        return error;
 }
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+        struct dlm_lkb *lkb;
+        struct dlm_args args;
+        struct dlm_user_args *ua;
+        struct dlm_rsb *r;
+        int error;
+        dlm_lock_recovery(ls);
+        error = find_lkb(ls, lkid, &lkb);
+        if (error)
+                goto out;
+        ua = (struct dlm_user_args *)lkb->lkb_astparam;
+        error = set_unlock_args(flags, ua, &args);
+        if (error)
+                goto out_put;
+        /* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+        r = lkb->lkb_resource;
+        hold_rsb(r);
+        lock_rsb(r);
+        error = validate_unlock_args(lkb, &args);
+        if (error)
+                goto out_r;
+        lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+        error = _cancel_lock(r, lkb);
+ out_r:
+        unlock_rsb(r);
+        put_rsb(r);
+        if (error == -DLM_ECANCEL)
+                error = 0;
+        /* from validate_unlock_args() */
+        if (error == -EBUSY)
+                error = 0;
+ out_put:
+        dlm_put_lkb(lkb);
+ out:
+        dlm_unlock_recovery(ls);
+        return error;
+}
 /* lkb's that are removed from the waiters list by revert are just left on the
   orphans list with the granted orphan locks, to be freed by purge */
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
        struct dlm_lkb *lkb, *safe;
-        lock_recovery(ls);
+        dlm_lock_recovery(ls);
        while (1) {
                lkb = del_proc_lock(ls, proc);
                if (!lkb)
                        break;
+                del_timeout(lkb);
                if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
                        orphan_proc_lock(ls, lkb);
                else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
        }
        mutex_unlock(&ls->ls_clear_proc_locks);
-        unlock_recovery(ls);
+        dlm_unlock_recovery(ls);
 }
 static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
        if (nodeid != dlm_our_nodeid()) {
                error = send_purge(ls, nodeid, pid);
        } else {
-                lock_recovery(ls);
+                dlm_lock_recovery(ls);
                if (pid == current->pid)
                        purge_proc_locks(ls, proc);
                else
                        do_purge(ls, nodeid, pid);
-                unlock_recovery(ls);
+                dlm_unlock_recovery(ls);
        }
        return error;
 }
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec40668..1720313c22df 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
 int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
-        uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+        uint32_t flags, void *name, unsigned int namelen,
+        unsigned long timeout_cs);
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-        int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+        int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+        unsigned long timeout_cs);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
        uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
        uint32_t flags, uint32_t lkid);
 int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
        int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed4..1dc72105ab12 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
        else
                kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
+        log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+        /* dlm_controld will see the uevent, do the necessary group management
+           and then write to sysfs to wake us */
        error = wait_event_interruptible(ls->ls_uevent_wait,
                        test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+        log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
        if (error)
                goto out;
        error = ls->ls_uevent_result;
 out:
+        if (error)
+                log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+                          error, ls->ls_uevent_result);
        return error;
 }
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
        struct dlm_ls *ls;
        while (!kthread_should_stop()) {
-                list_for_each_entry(ls, &lslist, ls_list)
+                list_for_each_entry(ls, &lslist, ls_list) {
-                        dlm_scan_rsbs(ls);
+                        if (dlm_lock_recovery_try(ls)) {
+                                dlm_scan_rsbs(ls);
+                                dlm_scan_timeout(ls);
+                                dlm_unlock_recovery(ls);
+                        }
+                }
                schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
        }
        return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 {
        struct dlm_ls *ls;
        int i, size, error = -ENOMEM;
+        int do_unreg = 0;
        if (namelen > DLM_LOCKSPACE_LEN)
                return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
                goto out;
        memcpy(ls->ls_name, name, namelen);
        ls->ls_namelen = namelen;
-        ls->ls_exflags = flags;
        ls->ls_lvblen = lvblen;
        ls->ls_count = 0;
        ls->ls_flags = 0;
+        if (flags & DLM_LSFL_TIMEWARN)
+                set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+        if (flags & DLM_LSFL_FS)
+                ls->ls_allocation = GFP_NOFS;
+        else
+                ls->ls_allocation = GFP_KERNEL;
+        /* ls_exflags are forced to match among nodes, and we don't
+           need to require all nodes to have TIMEWARN or FS set */
+        ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
        size = dlm_config.ci_rsbtbl_size;
        ls->ls_rsbtbl_size = size;
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        mutex_init(&ls->ls_waiters_mutex);
        INIT_LIST_HEAD(&ls->ls_orphans);
        mutex_init(&ls->ls_orphans_mutex);
+        INIT_LIST_HEAD(&ls->ls_timeout);
+        mutex_init(&ls->ls_timeout_mutex);
        INIT_LIST_HEAD(&ls->ls_nodes);
        INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        init_waitqueue_head(&ls->ls_uevent_wait);
        ls->ls_uevent_result = 0;
+        init_completion(&ls->ls_members_done);
+        ls->ls_members_result = -1;
        ls->ls_recoverd_task = NULL;
        mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
        error = dlm_recoverd_start(ls);
        if (error) {
                log_error(ls, "can't start dlm_recoverd %d", error);
-                goto out_rcomfree;
+                goto out_delist;
        }
-        dlm_create_debug_file(ls);
        error = kobject_setup(ls);
        if (error)
-                goto out_del;
+                goto out_stop;
        error = kobject_register(&ls->ls_kobj);
        if (error)
-                goto out_del;
+                goto out_stop;
+        /* let kobject handle freeing of ls if there's an error */
+        do_unreg = 1;
+        /* This uevent triggers dlm_controld in userspace to add us to the
+           group of nodes that are members of this lockspace (managed by the
+           cluster infrastructure.)  Once it's done that, it tells us who the
+           current lockspace members are (via configfs) and then tells the
+           lockspace to start running (via sysfs) in dlm_ls_start(). */
        error = do_uevent(ls, 1);
        if (error)
-                goto out_unreg;
+                goto out_stop;
+        wait_for_completion(&ls->ls_members_done);
+        error = ls->ls_members_result;
+        if (error)
+                goto out_members;
+        dlm_create_debug_file(ls);
+        log_debug(ls, "join complete");
        *lockspace = ls;
        return 0;
- out_unreg:
+ out_members:
-        kobject_unregister(&ls->ls_kobj);
+        do_uevent(ls, 0);
- out_del:
+        dlm_clear_members(ls);
-        dlm_delete_debug_file(ls);
+        kfree(ls->ls_node_array);
+ out_stop:
        dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
        spin_lock(&lslist_lock);
        list_del(&ls->ls_list);
        spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 out_rsbfree:
        kfree(ls->ls_rsbtbl);
 out_lsfree:
-        kfree(ls);
+        if (do_unreg)
+                kobject_unregister(&ls->ls_kobj);
+        else
+                kfree(ls);
 out:
        module_put(THIS_MODULE);
        return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
        error = new_lockspace(name, namelen, lockspace, flags, lvblen);
        if (!error)
                ls_count++;
+        else if (!ls_count)
+                threads_stop();
 out:
        mutex_unlock(&ls_lock);
        return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
        dlm_clear_members_gone(ls);
        kfree(ls->ls_node_array);
        kobject_unregister(&ls->ls_kobj);
-        /* The ls structure will be freed when the kobject is done with */
+        /* The ls structure will be freed when the kobject is done with */
        mutex_lock(&ls_lock);
        ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29b..0553a6158dcb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
        struct connection *con = sock2con(sk);
-        if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+        if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
                queue_work(recv_workqueue, &con->rwork);
 }
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
 {
        struct connection *con = sock2con(sk);
-        if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+        if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
                queue_work(send_workqueue, &con->swork);
 }
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
                        INIT_WORK(&othercon->rwork, process_recv_sockets);
                        set_bit(CF_IS_OTHERCON, &othercon->flags);
                        newcon->othercon = othercon;
+                        othercon->sock = newsock;
+                        newsock->sk->sk_user_data = othercon;
+                        add_sock(newsock, othercon);
+                        addcon = othercon;
+                }
+                else {
+                        printk("Extra connection from node %d attempted\n", nodeid);
+                        result = -EAGAIN;
+                        mutex_unlock(&newcon->sock_mutex);
+                        goto accept_err;
                }
-                othercon->sock = newsock;
-                newsock->sk->sk_user_data = othercon;
-                add_sock(newsock, othercon);
-                addcon = othercon;
        }
        else {
                newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
        down(&connections_lock);
        for (i = 0; i <= max_nodeid; i++) {
                con = __nodeid2con(i, 0);
-                if (con)
+                if (con) {
                        con->flags |= 0xFF;
+                        if (con->sock)
+                                con->sock->sk->sk_user_data = NULL;
+                }
        }
        up(&connections_lock);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe5..eca2907f2386 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
 static inline int dlm_register_debugfs(void) { return 0; }
 static inline void dlm_unregister_debugfs(void) { }
 #endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
 static int __init init_dlm(void)
 {
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
        if (error)
                goto out_debug;
+        error = dlm_netlink_init();
+        if (error)
+                goto out_user;
        printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
        return 0;
+ out_user:
+        dlm_user_exit();
 out_debug:
        dlm_unregister_debugfs();
 out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
 static void __exit exit_dlm(void)
 {
+        dlm_netlink_exit();
        dlm_user_exit();
        dlm_config_exit();
        dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd740..073599dced2a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
        *neg_out = neg;
        error = ping_members(ls);
+        if (!error || error == -EPROTO) {
+                /* new_lockspace() may be waiting to know if the config
+                   is good or bad */
+                ls->ls_members_result = error;
+                complete(&ls->ls_members_done);
+        }
        if (error)
                goto out;
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
        dlm_recoverd_suspend(ls);
        ls->ls_recover_status = 0;
        dlm_recoverd_resume(ls);
+        if (!ls->ls_recover_begin)
+                ls->ls_recover_begin = jiffies;
        return 0;
 }
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 000000000000..863b87d0dc71
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+#include "dlm_internal.h"
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+static struct genl_family family = {
+        .id             = GENL_ID_GENERATE,
+        .name           = DLM_GENL_NAME,
+        .version        = DLM_GENL_VERSION,
+};
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+        struct sk_buff *skb;
+        void *data;
+        skb = genlmsg_new(size, GFP_KERNEL);
+        if (!skb)
+                return -ENOMEM;
+        /* add the message headers */
+        data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+        if (!data) {
+                nlmsg_free(skb);
+                return -EINVAL;
+        }
+        *skbp = skb;
+        return 0;
+}
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+        struct nlattr *ret;
+        ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+        if (!ret)
+                return NULL;
+        return nla_data(ret);
+}
+static int send_data(struct sk_buff *skb)
+{
+        struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+        void *data = genlmsg_data(genlhdr);
+        int rv;
+        rv = genlmsg_end(skb, data);
+        if (rv < 0) {
+                nlmsg_free(skb);
+                return rv;
+        }
+        return genlmsg_unicast(skb, listener_nlpid);
+}
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+        listener_nlpid = info->snd_pid;
+        printk("user_cmd nlpid %u\n", listener_nlpid);
+        return 0;
+}
+static struct genl_ops dlm_nl_ops = {
+        .cmd            = DLM_CMD_HELLO,
+        .doit           = user_cmd,
+};
+int dlm_netlink_init(void)
+{
+        int rv;
+        rv = genl_register_family(&family);
+        if (rv)
+                return rv;
+        rv = genl_register_ops(&family, &dlm_nl_ops);
+        if (rv < 0)
+                goto err;
+        return 0;
+ err:
+        genl_unregister_family(&family);
+        return rv;
+}
+void dlm_netlink_exit(void)
+{
+        genl_unregister_ops(&family, &dlm_nl_ops);
+        genl_unregister_family(&family);
+}
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+        struct dlm_rsb *r = lkb->lkb_resource;
+        struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+        memset(data, 0, sizeof(struct dlm_lock_data));
+        data->version = DLM_LOCK_DATA_VERSION;
+        data->nodeid = lkb->lkb_nodeid;
+        data->ownpid = lkb->lkb_ownpid;
+        data->id = lkb->lkb_id;
+        data->remid = lkb->lkb_remid;
+        data->status = lkb->lkb_status;
+        data->grmode = lkb->lkb_grmode;
+        data->rqmode = lkb->lkb_rqmode;
+        data->timestamp = lkb->lkb_timestamp;
+        if (ua)
+                data->xid = ua->xid;
+        if (r) {
+                data->lockspace_id = r->res_ls->ls_global_id;
+                data->resource_namelen = r->res_length;
+                memcpy(data->resource_name, r->res_name, r->res_length);
+        }
+}
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+        struct dlm_lock_data *data;
+        struct sk_buff *send_skb;
+        size_t size;
+        int rv;
+        size = nla_total_size(sizeof(struct dlm_lock_data)) +
+               nla_total_size(0); /* why this? */
+        rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+        if (rv < 0)
+                return;
+        data = mk_data(send_skb);
+        if (!data) {
+                nlmsg_free(send_skb);
+                return;
+        }
+        fill_data(data, lkb);
+        send_data(send_skb);
+}
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd6153809..e3a1527cbdbe 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
        char *mb;
        int mb_len = sizeof(struct dlm_rcom) + len;
-        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+        mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh) {
                log_print("create_rcom to %d type %d len %d ENOBUFS",
                          to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
                log_error(ls, "version mismatch: %x nodeid %d: %x",
                          DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
                          rc->rc_header.h_version);
-                return -EINVAL;
+                return -EPROTO;
        }
        if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
                log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
                          ls->ls_lvblen, ls->ls_exflags,
                          nodeid, rf->rf_lvblen, rf->rf_lsflags);
-                return -EINVAL;
+                return -EPROTO;
        }
        return 0;
 }
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
        dlm_recover_process_copy(ls, rc_in);
 }
-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+                             struct dlm_rcom *rc_in)
 {
        struct dlm_rcom *rc;
        struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
        char *mb;
        int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
-        mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+        mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
        if (!mh)
                return -ENOBUFS;
        memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
                log_print("lockspace %x from %d type %x not found",
                          hd->h_lockspace, nodeid, rc->rc_type);
                if (rc->rc_type == DLM_RCOM_STATUS)
-                        send_ls_not_ready(nodeid, rc);
+                        send_ls_not_ready(ls, nodeid, rc);
                return;
        }
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d60249..66575997861c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
        dlm_clear_members_gone(ls);
+        dlm_adjust_timeouts(ls);
        error = enable_locking(ls, rv->seq);
        if (error) {
                log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a7..6438941ab1f8 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
 struct dlm_lock_params32 {
        __u8 mode;
        __u8 namelen;
-        __u16 flags;
+        __u16 unused;
+        __u32 flags;
        __u32 lkid;
        __u32 parent;
+        __u64 xid;
+        __u64 timeout;
        __u32 castparam;
        __u32 castaddr;
        __u32 bastparam;
        __u32 bastaddr;
        __u32 lksb;
        char lvb[DLM_USER_LVB_LEN];
        char name[0];
 };
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
 };
 struct dlm_lock_result32 {
+        __u32 version[3];
        __u32 length;
        __u32 user_astaddr;
        __u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
                kb->i.lock.flags = kb32->i.lock.flags;
                kb->i.lock.lkid = kb32->i.lock.lkid;
                kb->i.lock.parent = kb32->i.lock.parent;
+                kb->i.lock.xid = kb32->i.lock.xid;
+                kb->i.lock.timeout = kb32->i.lock.timeout;
                kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
                kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
                kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
 static void compat_output(struct dlm_lock_result *res,
                          struct dlm_lock_result32 *res32)
 {
+        res32->version[0] = res->version[0];
+        res32->version[1] = res->version[1];
+        res32->version[2] = res->version[2];
        res32->user_astaddr = (__u32)(long)res->user_astaddr;
        res32->user_astparam = (__u32)(long)res->user_astparam;
        res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
 }
 #endif
+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+        switch (sb_status) {
+        case -DLM_EUNLOCK:
+                return 1;
+        case -DLM_ECANCEL:
+        case -ETIMEDOUT:
+        case -EDEADLK:
+                if (lkb->lkb_grmode == DLM_LOCK_IV)
+                        return 1;
+                break;
+        case -EAGAIN:
+                if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+                        return 1;
+                break;
+        }
+        return 0;
+}
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
   being removed and then remove that lkb from the orphans list and free it */
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
                log_debug(ls, "ast overlap %x status %x %x",
                          lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
-        /* Figure out if this lock is at the end of its life and no longer
+        eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
-           available for the application to use.  The lkb still exists until
-           the final ast is read.  A lock becomes EOL in three situations:
-             1. a noqueue request fails with EAGAIN
-             2. an unlock completes with EUNLOCK
-             3. a cancel of a waiting request completes with ECANCEL
-           An EOL lock needs to be removed from the process's list of locks.
-           And we can't allow any new operation on an EOL lock.  This is
-           not related to the lifetime of the lkb struct which is managed
-           entirely by refcount. */
-        if (type == AST_COMP &&
-            lkb->lkb_grmode == DLM_LOCK_IV &&
-            ua->lksb.sb_status == -EAGAIN)
-                eol = 1;
-        else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-            (ua->lksb.sb_status == -DLM_ECANCEL &&
-             lkb->lkb_grmode == DLM_LOCK_IV))
-                eol = 1;
        if (eol) {
                lkb->lkb_ast_type &= ~AST_BAST;
                lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
        ua->castaddr = params->castaddr;
        ua->bastparam = params->bastparam;
        ua->bastaddr = params->bastaddr;
+        ua->xid = params->xid;
        if (params->flags & DLM_LKF_CONVERT)
                error = dlm_user_convert(ls, ua,
                                         params->mode, params->flags,
-                                         params->lkid, params->lvb);
+                                         params->lkid, params->lvb,
+                                         (unsigned long) params->timeout);
        else {
                error = dlm_user_request(ls, ua,
                                         params->mode, params->flags,
                                         params->name, params->namelen,
-                                         params->parent);
+                                         (unsigned long) params->timeout);
                if (!error)
                        error = ua->lksb.sb_lkid;
        }
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
        return error;
 }
+static int device_user_deadlock(struct dlm_user_proc *proc,
+                                struct dlm_lock_params *params)
+{
+        struct dlm_ls *ls;
+        int error;
+        ls = dlm_find_lockspace_local(proc->lockspace);
+        if (!ls)
+                return -ENOENT;
+        error = dlm_user_deadlock(ls, params->flags, params->lkid);
+        dlm_put_lockspace(ls);
+        return error;
+}
 static int create_misc_device(struct dlm_ls *ls, char *name)
 {
        int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
                return -EPERM;
        error = dlm_new_lockspace(params->name, strlen(params->name),
-                                  &lockspace, 0, DLM_USER_LVB_LEN);
+                                  &lockspace, params->flags, DLM_USER_LVB_LEN);
        if (error)
                return error;
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
                error = device_user_unlock(proc, &kbuf->i.lock);
                break;
+        case DLM_USER_DEADLOCK:
+                if (!proc) {
+                        log_print("no locking on control device");
+                        goto out_sig;
+                }
+                error = device_user_deadlock(proc, &kbuf->i.lock);
+                break;
        case DLM_USER_CREATE_LOCKSPACE:
                if (proc) {
                        log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
        int struct_len;
        memset(&result, 0, sizeof(struct dlm_lock_result));
+        result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+        result.version[1] = DLM_DEVICE_VERSION_MINOR;
+        result.version[2] = DLM_DEVICE_VERSION_PATCH;
        memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
        result.user_lksb = ua->user_lksb;
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
        return error;
 }
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+        struct dlm_device_version ver;
+        memset(&ver, 0, sizeof(struct dlm_device_version));
+        ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+        ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+        ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+        if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+                return -EFAULT;
+        return sizeof(struct dlm_device_version);
+}
 /* a read returns a single ast described in a struct dlm_lock_result */
 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
        DECLARE_WAITQUEUE(wait, current);
        int error, type=0, bmode=0, removed = 0;
+        if (count == sizeof(struct dlm_device_version)) {
+                error = copy_version_to_user(buf, count);
+                return error;
+        }
+        if (!proc) {
+                log_print("non-version read from control device %zu", count);
+                return -EINVAL;
+        }
 #ifdef CONFIG_COMPAT
        if (count < sizeof(struct dlm_lock_result32))
 #else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
                }
        }
-        if (list_empty(&proc->asts)) {
-                spin_unlock(&proc->asts_spin);
-                return -EAGAIN;
-        }
        /* there may be both completion and blocking asts to return for
           the lkb, don't remove lkb from asts list unless no asts remain */
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
 static const struct file_operations ctl_device_fops = {
        .open    = ctl_device_open,
        .release = ctl_device_close,
+        .read    = device_read,
        .write   = device_write,
        .owner   = THIS_MODULE,
 };
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 59288d817078..94f456fe4d9b 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -338,16 +338,17 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
        return rc;
 }
-static ssize_t ecryptfs_sendfile(struct file *file, loff_t * ppos,
+static ssize_t ecryptfs_splice_read(struct file *file, loff_t * ppos,
-                                 size_t count, read_actor_t actor, void *target)
+                                    struct pipe_inode_info *pipe, size_t count,
+                                    unsigned int flags)
 {
        struct file *lower_file = NULL;
        int rc = -EINVAL;
        lower_file = ecryptfs_file_to_lower(file);
-        if (lower_file->f_op && lower_file->f_op->sendfile)
+        if (lower_file->f_op && lower_file->f_op->splice_read)
-                rc = lower_file->f_op->sendfile(lower_file, ppos, count,
+                rc = lower_file->f_op->splice_read(lower_file, ppos, pipe,
-                                                actor, target);
+                                                count, flags);
        return rc;
 }
@@ -364,7 +365,7 @@ const struct file_operations ecryptfs_dir_fops = {
        .release = ecryptfs_release,
        .fsync = ecryptfs_fsync,
        .fasync = ecryptfs_fasync,
-        .sendfile = ecryptfs_sendfile,
+        .splice_read = ecryptfs_splice_read,
 };
 const struct file_operations ecryptfs_main_fops = {
@@ -381,7 +382,7 @@ const struct file_operations ecryptfs_main_fops = {
        .release = ecryptfs_release,
        .fsync = ecryptfs_fsync,
        .fasync = ecryptfs_fasync,
-        .sendfile = ecryptfs_sendfile,
+        .splice_read = ecryptfs_splice_read,
 };
 static int
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 566d4e2d3852..04afeecaaef3 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -53,7 +53,6 @@ const struct file_operations ext2_file_operations = {
        .open           = generic_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_sync_file,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
@@ -71,7 +70,6 @@ const struct file_operations ext2_xip_file_operations = {
        .open           = generic_file_open,
        .release        = ext2_release_file,
        .fsync          = ext2_sync_file,
-        .sendfile       = xip_file_sendfile,
 };
 #endif
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 1e6f13864536..acc4913d3019 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext3_file_operations = {
        .open           = generic_file_open,
        .release        = ext3_release_file,
        .fsync          = ext3_sync_file,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3c6c1fd2be90..d4c8186aed64 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -120,7 +120,6 @@ const struct file_operations ext4_file_operations = {
        .open           = generic_file_open,
        .release        = ext4_release_file,
        .fsync          = ext4_sync_file,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
 };
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 55d3c7461c5b..69a83b59dce8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ const struct file_operations fat_file_operations = {
        .release        = fat_file_release,
        .ioctl          = fat_generic_ioctl,
        .fsync          = file_fsync,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 static int fat_cont_expand(struct inode *inode, loff_t size)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index adf7995232b8..f79de7c8cdfa 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -802,7 +802,7 @@ static const struct file_operations fuse_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -814,7 +814,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
        .release        = fuse_release,
        .fsync          = fuse_fsync,
        .lock           = fuse_file_lock,
-        /* no mmap and sendfile */
+        /* no mmap and splice_read */
 };
 static const struct address_space_operations fuse_file_aops  = {
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index e3f1ada643ac..04ad0caebedb 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,7 +1,7 @@
 obj-$(CONFIG_GFS2_FS) += gfs2.o
 gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
        glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
-        mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
+        mount.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
        ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
        recovery.o rgrp.o super.o sys.o trans.o util.o
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c53a5d2d0590..cd805a66880d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -718,7 +718,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-                rg_blocks += rgd->rd_ri.ri_length;
+                rg_blocks += rgd->rd_length;
        }
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
@@ -772,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
                        gfs2_free_data(ip, bstart, blen);
        }
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(ip, dibh->b_data);
@@ -824,7 +824,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
                goto out_gunlock_q;
        error = gfs2_trans_begin(sdp,
-                        sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
+                        sdp->sd_max_height + al->al_rgd->rd_length +
                        RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
        if (error)
                goto out_ipres;
@@ -847,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
        }
        ip->i_di.di_size = size;
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (error)
@@ -885,7 +885,6 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
        unsigned blocksize, iblock, length, pos;
        struct buffer_head *bh;
        struct page *page;
-        void *kaddr;
        int err;
        page = grab_cache_page(mapping, index);
@@ -928,15 +927,13 @@ static int gfs2_block_truncate_page(struct address_space *mapping)
                /* Uhhuh. Read error. Complain and punt. */
                if (!buffer_uptodate(bh))
                        goto unlock;
+                err = 0;
        }
        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
-        kaddr = kmap_atomic(page, KM_USER0);
+        zero_user_page(page, offset, length, KM_USER0);
-        memset(kaddr + offset, 0, length);
-        flush_dcache_page(page);
-        kunmap_atomic(kaddr, KM_USER0);
 unlock:
        unlock_page(page);
@@ -962,7 +959,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
        if (gfs2_is_stuffed(ip)) {
                ip->i_di.di_size = size;
-                ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -974,7 +971,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
                if (!error) {
                        ip->i_di.di_size = size;
-                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                        ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                        gfs2_dinode_out(ip, dibh->b_data);
@@ -1044,10 +1041,10 @@ static int trunc_end(struct gfs2_inode *ip)
                ip->i_di.di_height = 0;
                ip->i_di.di_goal_meta =
                        ip->i_di.di_goal_data =
-                        ip->i_num.no_addr;
+                        ip->i_no_addr;
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
        }
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c
index 683cb5bda870..3548d9f31e0d 100644
--- a/fs/gfs2/daemon.c
+++ b/fs/gfs2/daemon.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/freezer.h>
 #include "gfs2.h"
 #include "incore.h"
@@ -49,6 +50,8 @@ int gfs2_scand(void *data)
        while (!kthread_should_stop()) {
                gfs2_scand_internal(sdp);
                t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
@@ -74,6 +77,8 @@ int gfs2_glockd(void *data)
                wait_event_interruptible(sdp->sd_reclaim_wq,
                                         (atomic_read(&sdp->sd_reclaim_count) ||
                                         kthread_should_stop()));
+                if (freezing(current))
+                        refrigerator();
        }
        return 0;
@@ -93,6 +98,8 @@ int gfs2_recoverd(void *data)
        while (!kthread_should_stop()) {
                gfs2_check_journals(sdp);
                t = gfs2_tune_get(sdp,  gt_recoverd_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
@@ -141,6 +148,8 @@ int gfs2_logd(void *data)
                }
                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
@@ -191,6 +200,8 @@ int gfs2_quotad(void *data)
                gfs2_quota_scan(sdp);
                t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
+                if (freezing(current))
+                        refrigerator();
                schedule_timeout_interruptible(t);
        }
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index a96fa07b3f3b..2beb2f401aa2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
        if (ip->i_di.di_size < offset + size)
                ip->i_di.di_size = offset + size;
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -228,7 +228,7 @@ out:
        if (ip->i_di.di_size < offset + copied)
                ip->i_di.di_size = offset + copied;
-        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -1456,7 +1456,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                if (dip->i_di.di_entries != g.offset) {
                        fs_warn(sdp, "Number of entries corrupt in dir %llu, "
                                "ip->i_di.di_entries (%u) != g.offset (%u)\n",
-                                (unsigned long long)dip->i_num.no_addr,
+                                (unsigned long long)dip->i_no_addr,
                                dip->i_di.di_entries,
                                g.offset);
                        error = -EIO;
@@ -1488,24 +1488,55 @@ out:
 * Returns: errno
 */
-int gfs2_dir_search(struct inode *dir, const struct qstr *name,
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
-                    struct gfs2_inum_host *inum, unsigned int *type)
 {
        struct buffer_head *bh;
        struct gfs2_dirent *dent;
+        struct inode *inode;
+        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
+        if (dent) {
+                if (IS_ERR(dent))
+                        return ERR_PTR(PTR_ERR(dent));
+                inode = gfs2_inode_lookup(dir->i_sb, 
+                                be16_to_cpu(dent->de_type),
+                                be64_to_cpu(dent->de_inum.no_addr),
+                                be64_to_cpu(dent->de_inum.no_formal_ino));
+                brelse(bh);
+                return inode;
+        }
+        return ERR_PTR(-ENOENT);
+}
+int gfs2_dir_check(struct inode *dir, const struct qstr *name,
+                   const struct gfs2_inode *ip)
+{
+        struct buffer_head *bh;
+        struct gfs2_dirent *dent;
+        int ret = -ENOENT;
        dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh);
        if (dent) {
                if (IS_ERR(dent))
                        return PTR_ERR(dent);
-                if (inum)
+                if (ip) {
-                        gfs2_inum_in(inum, (char *)&dent->de_inum);
+                        if (be64_to_cpu(dent->de_inum.no_addr) != ip->i_no_addr)
-                if (type)
+                                goto out;
-                        *type = be16_to_cpu(dent->de_type);
+                        if (be64_to_cpu(dent->de_inum.no_formal_ino) !=
+                            ip->i_no_formal_ino)
+                                goto out;
+                        if (unlikely(IF2DT(ip->i_inode.i_mode) !=
+                            be16_to_cpu(dent->de_type))) {
+                                gfs2_consist_inode(GFS2_I(dir));
+                                ret = -EIO;
+                                goto out;
+                        }
+                }
+                ret = 0;
+out:
                brelse(bh);
-                return 0;
        }
-        return -ENOENT;
+        return ret;
 }
 static int dir_new_leaf(struct inode *inode, const struct qstr *name)
@@ -1565,7 +1596,7 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name)
 */
 int gfs2_dir_add(struct inode *inode, const struct qstr *name,
-                 const struct gfs2_inum_host *inum, unsigned type)
+                 const struct gfs2_inode *nip, unsigned type)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct buffer_head *bh;
@@ -1580,7 +1611,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                        if (IS_ERR(dent))
                                return PTR_ERR(dent);
                        dent = gfs2_init_dirent(inode, dent, name, bh);
-                        gfs2_inum_out(inum, (char *)&dent->de_inum);
+                        gfs2_inum_out(nip, dent);
                        dent->de_type = cpu_to_be16(type);
                        if (ip->i_di.di_flags & GFS2_DIF_EXHASH) {
                                leaf = (struct gfs2_leaf *)bh->b_data;
@@ -1592,7 +1623,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                                break;
                        gfs2_trans_add_bh(ip->i_gl, bh, 1);
                        ip->i_di.di_entries++;
-                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                        ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
                        gfs2_dinode_out(ip, bh->b_data);
                        brelse(bh);
                        error = 0;
@@ -1678,7 +1709,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
                gfs2_consist_inode(dip);
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
        dip->i_di.di_entries--;
-        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        mark_inode_dirty(&dip->i_inode);
@@ -1700,7 +1731,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
 */
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-                   struct gfs2_inum_host *inum, unsigned int new_type)
+                   const struct gfs2_inode *nip, unsigned int new_type)
 {
        struct buffer_head *bh;
        struct gfs2_dirent *dent;
@@ -1715,7 +1746,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                return PTR_ERR(dent);
        gfs2_trans_add_bh(dip->i_gl, bh, 1);
-        gfs2_inum_out(inum, (char *)&dent->de_inum);
+        gfs2_inum_out(nip, dent);
        dent->de_type = cpu_to_be16(new_type);
        if (dip->i_di.di_flags & GFS2_DIF_EXHASH) {
@@ -1726,7 +1757,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
                gfs2_trans_add_bh(dip->i_gl, bh, 1);
        }
-        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_dinode_out(dip, bh->b_data);
        brelse(bh);
        return 0;
@@ -1867,7 +1898,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-                rg_blocks += rgd->rd_ri.ri_length;
+                rg_blocks += rgd->rd_length;
        }
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index 48fe89046bba..8a468cac9328 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,15 +16,16 @@ struct inode;
 struct gfs2_inode;
 struct gfs2_inum;
-int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
+struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename);
-                    struct gfs2_inum_host *inum, unsigned int *type);
+int gfs2_dir_check(struct inode *dir, const struct qstr *filename,
+                   const struct gfs2_inode *ip);
 int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
-                 const struct gfs2_inum_host *inum, unsigned int type);
+                 const struct gfs2_inode *ip, unsigned int type);
 int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
 int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
                  filldir_t filldir);
 int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
-                   struct gfs2_inum_host *new_inum, unsigned int new_type);
+                   const struct gfs2_inode *nip, unsigned int new_type);
 int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index 5b83ca6acab1..2a7435b5c4dc 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -254,7 +254,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        if (error)
                return error;
-        error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + RES_DINODE +
+        error = gfs2_trans_begin(sdp, rgd->rd_length + RES_DINODE +
                                 RES_EATTR + RES_STATFS + RES_QUOTA, blks);
        if (error)
                goto out_gunlock;
@@ -300,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -700,7 +700,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                goto out_gunlock_q;
        error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
-                                 blks + al->al_rgd->rd_ri.ri_length +
+                                 blks + al->al_rgd->rd_length +
                                 RES_DINODE + RES_STATFS + RES_QUOTA, 0);
        if (error)
                goto out_ipres;
@@ -717,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
                                            (er->er_mode & S_IFMT));
                        ip->i_inode.i_mode = er->er_mode;
                }
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -852,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
                        (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
                ip->i_inode.i_mode = er->er_mode;
        }
-        ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
@@ -1133,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
        error = gfs2_meta_inode_buffer(ip, &dibh);
        if (!error) {
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -1352,7 +1352,7 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
        for (x = 0; x < rlist.rl_rgrps; x++) {
                struct gfs2_rgrpd *rgd;
                rgd = rlist.rl_ghs[x].gh_gl->gl_object;
-                rg_blocks += rgd->rd_ri.ri_length;
+                rg_blocks += rgd->rd_length;
        }
        error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1815429a2978..3f0974e1afef 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -422,11 +422,11 @@ void gfs2_holder_uninit(struct gfs2_holder *gh)
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
        clear_bit(HIF_WAIT, &gh->gh_iflags);
-        smp_mb();
+        smp_mb__after_clear_bit();
        wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }
-static int holder_wait(void *word)
+static int just_schedule(void *word)
 {
        schedule();
        return 0;
@@ -435,7 +435,20 @@ static int holder_wait(void *word)
 static void wait_on_holder(struct gfs2_holder *gh)
 {
        might_sleep();
-        wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+        wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE);
+}
+static void gfs2_demote_wake(struct gfs2_glock *gl)
+{
+        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
+}
+static void wait_on_demote(struct gfs2_glock *gl)
+{
+        might_sleep();
+        wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE);
 }
 /**
@@ -528,7 +541,7 @@ static int rq_demote(struct gfs2_glock *gl)
        if (gl->gl_state == gl->gl_demote_state ||
            gl->gl_state == LM_ST_UNLOCKED) {
-                clear_bit(GLF_DEMOTE, &gl->gl_flags);
+                gfs2_demote_wake(gl);
                return 0;
        }
        set_bit(GLF_LOCK, &gl->gl_flags);
@@ -666,12 +679,22 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl)
 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
 */
-static void handle_callback(struct gfs2_glock *gl, unsigned int state)
+static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote)
 {
        spin_lock(&gl->gl_spin);
        if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) {
                gl->gl_demote_state = state;
                gl->gl_demote_time = jiffies;
+                if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN &&
+                    gl->gl_object) {
+                        struct inode *inode = igrab(gl->gl_object);
+                        spin_unlock(&gl->gl_spin);
+                        if (inode) {
+                                d_prune_aliases(inode);
+                                iput(inode);
+                        }
+                        return;
+                }
        } else if (gl->gl_demote_state != LM_ST_UNLOCKED) {
                gl->gl_demote_state = state;
        }
@@ -740,7 +763,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
                if (ret & LM_OUT_CANCELED)
                        op_done = 0;
                else
-                        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+                        gfs2_demote_wake(gl);
        } else {
                spin_lock(&gl->gl_spin);
                list_del_init(&gh->gh_list);
@@ -848,7 +871,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
        gfs2_assert_warn(sdp, !ret);
        state_change(gl, LM_ST_UNLOCKED);
-        clear_bit(GLF_DEMOTE, &gl->gl_flags);
+        gfs2_demote_wake(gl);
        if (glops->go_inval)
                glops->go_inval(gl, DIO_METADATA);
@@ -1174,7 +1197,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        const struct gfs2_glock_operations *glops = gl->gl_ops;
        if (gh->gh_flags & GL_NOCACHE)
-                handle_callback(gl, LM_ST_UNLOCKED);
+                handle_callback(gl, LM_ST_UNLOCKED, 0);
        gfs2_glmutex_lock(gl);
@@ -1196,6 +1219,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
        spin_unlock(&gl->gl_spin);
 }
+void gfs2_glock_dq_wait(struct gfs2_holder *gh)
+{
+        struct gfs2_glock *gl = gh->gh_gl;
+        gfs2_glock_dq(gh);
+        wait_on_demote(gl);
+}
 /**
 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
 * @gh: the holder structure
@@ -1297,10 +1327,6 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 * @num_gh: the number of structures
 * @ghs: an array of struct gfs2_holder structures
 *
- * Figure out how big an impact this function has.  Either:
- * 1) Replace this code with code that calls gfs2_glock_prefetch()
- * 2) Forget async stuff and just call nq_m_sync()
- * 3) Leave it like it is
 *
 * Returns: 0 on success (all glocks acquired),
 *          errno on failure (no glocks acquired)
@@ -1308,62 +1334,28 @@ static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
 int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
 {
-        int *e;
+        struct gfs2_holder *tmp[4];
-        unsigned int x;
+        struct gfs2_holder **pph = tmp;
-        int borked = 0, serious = 0;
        int error = 0;
-        if (!num_gh)
+        switch(num_gh) {
+        case 0:
                return 0;
+        case 1:
-        if (num_gh == 1) {
                ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
                return gfs2_glock_nq(ghs);
-        }
+        default:
+                if (num_gh <= 4)
-        e = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
-        if (!e)
-                return -ENOMEM;
-        for (x = 0; x < num_gh; x++) {
-                ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
-                error = gfs2_glock_nq(&ghs[x]);
-                if (error) {
-                        borked = 1;
-                        serious = error;
-                        num_gh = x;
                        break;
-                }
+                pph = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_NOFS);
-        }
+                if (!pph)
+                        return -ENOMEM;
-        for (x = 0; x < num_gh; x++) {
-                error = e[x] = glock_wait_internal(&ghs[x]);
-                if (error) {
-                        borked = 1;
-                        if (error != GLR_TRYFAILED && error != GLR_CANCELED)
-                                serious = error;
-                }
        }
-        if (!borked) {
+        error = nq_m_sync(num_gh, ghs, pph);
-                kfree(e);
-                return 0;
-        }
-        for (x = 0; x < num_gh; x++)
-                if (!e[x])
-                        gfs2_glock_dq(&ghs[x]);
-        if (serious)
-                error = serious;
-        else {
-                for (x = 0; x < num_gh; x++)
-                        gfs2_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
-                                          &ghs[x]);
-                error = nq_m_sync(num_gh, ghs, (struct gfs2_holder **)e);
-        }
-        kfree(e);
+        if (pph != tmp)
+                kfree(pph);
        return error;
 }
@@ -1456,7 +1448,7 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
        if (!gl)
                return;
-        handle_callback(gl, state);
+        handle_callback(gl, state, 1);
        spin_lock(&gl->gl_spin);
        run_queue(gl);
@@ -1596,7 +1588,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
        if (gfs2_glmutex_trylock(gl)) {
                if (list_empty(&gl->gl_holders) &&
                    gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
-                        handle_callback(gl, LM_ST_UNLOCKED);
+                        handle_callback(gl, LM_ST_UNLOCKED, 0);
                gfs2_glmutex_unlock(gl);
        }
@@ -1709,7 +1701,7 @@ static void clear_glock(struct gfs2_glock *gl)
        if (gfs2_glmutex_trylock(gl)) {
                if (list_empty(&gl->gl_holders) &&
                    gl->gl_state != LM_ST_UNLOCKED)
-                        handle_callback(gl, LM_ST_UNLOCKED);
+                        handle_callback(gl, LM_ST_UNLOCKED, 0);
                gfs2_glmutex_unlock(gl);
        }
 }
@@ -1823,7 +1815,8 @@ static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip)
        print_dbg(gi, "  Inode:\n");
        print_dbg(gi, "    num = %llu/%llu\n",
-                    ip->i_num.no_formal_ino, ip->i_num.no_addr);
+                  (unsigned long long)ip->i_no_formal_ino,
+                  (unsigned long long)ip->i_no_addr);
        print_dbg(gi, "    type = %u\n", IF2DT(ip->i_inode.i_mode));
        print_dbg(gi, "    i_flags =");
        for (x = 0; x < 32; x++)
@@ -1909,8 +1902,8 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl)
        }
        if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
                print_dbg(gi, "  Demotion req to state %u (%llu uS ago)\n",
-                          gl->gl_demote_state,
+                          gl->gl_demote_state, (unsigned long long)
-                          (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ));
+                          (jiffies - gl->gl_demote_time)*(1000000/HZ));
        }
        if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) {
                if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index b3e152db70c8..7721ca3fff9e 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,6 +87,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh);
 int gfs2_glock_poll(struct gfs2_holder *gh);
 int gfs2_glock_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq(struct gfs2_holder *gh);
+void gfs2_glock_dq_wait(struct gfs2_holder *gh);
 void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
 int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 7b82657a9910..777ca46010e8 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -156,9 +156,9 @@ static void inode_go_sync(struct gfs2_glock *gl)
                ip = NULL;
        if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
-                gfs2_log_flush(gl->gl_sbd, gl);
                if (ip)
                        filemap_fdatawrite(ip->i_inode.i_mapping);
+                gfs2_log_flush(gl->gl_sbd, gl);
                gfs2_meta_sync(gl);
                if (ip) {
                        struct address_space *mapping = ip->i_inode.i_mapping;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d995441373ab..170ba93829c0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -28,6 +28,14 @@ struct gfs2_sbd;
 typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
+struct gfs2_log_header_host {
+        u64 lh_sequence;        /* Sequence number of this transaction */
+        u32 lh_flags;           /* GFS2_LOG_HEAD_... */
+        u32 lh_tail;            /* Block number of log tail */
+        u32 lh_blkno;
+        u32 lh_hash;
+};
 /*
 * Structure of operations that are associated with each
 * type of element in the log.
@@ -60,12 +68,23 @@ struct gfs2_bitmap {
        u32 bi_len;
 };
+struct gfs2_rgrp_host {
+        u32 rg_flags;
+        u32 rg_free;
+        u32 rg_dinodes;
+        u64 rg_igeneration;
+};
 struct gfs2_rgrpd {
        struct list_head rd_list;       /* Link with superblock */
        struct list_head rd_list_mru;
        struct list_head rd_recent;     /* Recently used rgrps */
        struct gfs2_glock *rd_gl;       /* Glock for this rgrp */
-        struct gfs2_rindex_host rd_ri;
+        u64 rd_addr;                    /* grp block disk address */
+        u64 rd_data0;                   /* first data location */
+        u32 rd_length;                  /* length of rgrp header in fs blocks */
+        u32 rd_data;                    /* num of data blocks in rgrp */
+        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        struct gfs2_rgrp_host rd_rg;
        u64 rd_rg_vn;
        struct gfs2_bitmap *rd_bits;
@@ -76,6 +95,8 @@ struct gfs2_rgrpd {
        u32 rd_last_alloc_data;
        u32 rd_last_alloc_meta;
        struct gfs2_sbd *rd_sbd;
+        unsigned long rd_flags;
+#define GFS2_RDF_CHECK        0x0001          /* Need to check for unlinked inodes */
 };
 enum gfs2_state_bits {
@@ -211,10 +232,24 @@ enum {
        GIF_SW_PAGED            = 3,
 };
+struct gfs2_dinode_host {
+        u64 di_size;            /* number of bytes in file */
+        u64 di_blocks;          /* number of blocks in file */
+        u64 di_goal_meta;       /* rgrp to alloc from next */
+        u64 di_goal_data;       /* data block goal */
+        u64 di_generation;      /* generation number for NFS */
+        u32 di_flags;           /* GFS2_DIF_... */
+        u16 di_height;          /* height of metadata */
+        /* These only apply to directories  */
+        u16 di_depth;           /* Number of bits in the table */
+        u32 di_entries;         /* The number of entries in the directory */
+        u64 di_eattr;           /* extended attribute block number */
+};
 struct gfs2_inode {
        struct inode i_inode;
-        struct gfs2_inum_host i_num;
+        u64 i_no_addr;
+        u64 i_no_formal_ino;
        unsigned long i_flags;          /* GIF_... */
        struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
@@ -275,14 +310,6 @@ enum {
        QDF_LOCKED              = 2,
 };
-struct gfs2_quota_lvb {
-        __be32 qb_magic;
-        u32 __pad;
-        __be64 qb_limit;      /* Hard limit of # blocks to alloc */
-        __be64 qb_warn;       /* Warn user when alloc is above this # */
-        __be64 qb_value;       /* Current # blocks allocated */
-};
 struct gfs2_quota_data {
        struct list_head qd_list;
        unsigned int qd_count;
@@ -327,7 +354,9 @@ struct gfs2_trans {
        unsigned int tr_num_buf;
        unsigned int tr_num_buf_new;
+        unsigned int tr_num_databuf_new;
        unsigned int tr_num_buf_rm;
+        unsigned int tr_num_databuf_rm;
        struct list_head tr_list_buf;
        unsigned int tr_num_revoke;
@@ -354,6 +383,12 @@ struct gfs2_jdesc {
        unsigned int jd_blocks;
 };
+struct gfs2_statfs_change_host {
+        s64 sc_total;
+        s64 sc_free;
+        s64 sc_dinodes;
+};
 #define GFS2_GLOCKD_DEFAULT     1
 #define GFS2_GLOCKD_MAX         16
@@ -426,6 +461,28 @@ enum {
 #define GFS2_FSNAME_LEN         256
+struct gfs2_inum_host {
+        u64 no_formal_ino;
+        u64 no_addr;
+};
+struct gfs2_sb_host {
+        u32 sb_magic;
+        u32 sb_type;
+        u32 sb_format;
+        u32 sb_fs_format;
+        u32 sb_multihost_format;
+        u32 sb_bsize;
+        u32 sb_bsize_shift;
+        struct gfs2_inum_host sb_master_dir;
+        struct gfs2_inum_host sb_root_dir;
+        char sb_lockproto[GFS2_LOCKNAME_LEN];
+        char sb_locktable[GFS2_LOCKNAME_LEN];
+};
 struct gfs2_sbd {
        struct super_block *sd_vfs;
        struct super_block *sd_vfs_meta;
@@ -544,6 +601,7 @@ struct gfs2_sbd {
        unsigned int sd_log_blks_reserved;
        unsigned int sd_log_commited_buf;
+        unsigned int sd_log_commited_databuf;
        unsigned int sd_log_commited_revoke;
        unsigned int sd_log_num_gl;
@@ -552,7 +610,6 @@ struct gfs2_sbd {
        unsigned int sd_log_num_rg;
        unsigned int sd_log_num_databuf;
        unsigned int sd_log_num_jdata;
-        unsigned int sd_log_num_hdrs;
        struct list_head sd_log_le_gl;
        struct list_head sd_log_le_buf;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index df0b8b3018b9..34f7bcdea1e9 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -38,12 +38,17 @@
 #include "trans.h"
 #include "util.h"
+struct gfs2_inum_range_host {
+        u64 ir_start;
+        u64 ir_length;
+};
 static int iget_test(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_inum_host *inum = opaque;
+        u64 *no_addr = opaque;
-        if (ip->i_num.no_addr == inum->no_addr &&
+        if (ip->i_no_addr == *no_addr &&
            inode->i_private != NULL)
                return 1;
@@ -53,37 +58,70 @@ static int iget_test(struct inode *inode, void *opaque)
 static int iget_set(struct inode *inode, void *opaque)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_inum_host *inum = opaque;
+        u64 *no_addr = opaque;
-        ip->i_num = *inum;
+        inode->i_ino = (unsigned long)*no_addr;
-        inode->i_ino = inum->no_addr;
+        ip->i_no_addr = *no_addr;
        return 0;
 }
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
+{
+        unsigned long hash = (unsigned long)no_addr;
+        return ilookup5(sb, hash, iget_test, &no_addr);
+}
+static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 {
-        return ilookup5(sb, (unsigned long)inum->no_addr,
+        unsigned long hash = (unsigned long)no_addr;
-                        iget_test, inum);
+        return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
-static struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum_host *inum)
+/**
+ * GFS2 lookup code fills in vfs inode contents based on info obtained
+ * from directory entry inside gfs2_inode_lookup(). This has caused issues
+ * with NFS code path since its get_dentry routine doesn't have the relevant
+ * directory entry when gfs2_inode_lookup() is invoked. Part of the code
+ * segment inside gfs2_inode_lookup code needs to get moved around.
+ *
+ * Clean up I_LOCK and I_NEW as well.
+ **/
+void gfs2_set_iop(struct inode *inode)
 {
-        return iget5_locked(sb, (unsigned long)inum->no_addr,
+        umode_t mode = inode->i_mode;
-                     iget_test, iget_set, inum);
+        if (S_ISREG(mode)) {
+                inode->i_op = &gfs2_file_iops;
+                inode->i_fop = &gfs2_file_fops;
+                inode->i_mapping->a_ops = &gfs2_file_aops;
+        } else if (S_ISDIR(mode)) {
+                inode->i_op = &gfs2_dir_iops;
+                inode->i_fop = &gfs2_dir_fops;
+        } else if (S_ISLNK(mode)) {
+                inode->i_op = &gfs2_symlink_iops;
+        } else {
+                inode->i_op = &gfs2_dev_iops;
+        }
+        unlock_new_inode(inode);
 }
 /**
 * gfs2_inode_lookup - Lookup an inode
 * @sb: The super block
- * @inum: The inode number
+ * @no_addr: The inode number
 * @type: The type of the inode
 *
 * Returns: A VFS inode, or an error
 */
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned int type)
+struct inode *gfs2_inode_lookup(struct super_block *sb, 
+                                unsigned int type,
+                                u64 no_addr,
+                                u64 no_formal_ino)
 {
-        struct inode *inode = gfs2_iget(sb, inum);
+        struct inode *inode = gfs2_iget(sb, no_addr);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_glock *io_gl;
        int error;
@@ -93,29 +131,15 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
        if (inode->i_state & I_NEW) {
                struct gfs2_sbd *sdp = GFS2_SB(inode);
-                umode_t mode = DT2IF(type);
                inode->i_private = ip;
-                inode->i_mode = mode;
+                ip->i_no_formal_ino = no_formal_ino;
-                if (S_ISREG(mode)) {
-                        inode->i_op = &gfs2_file_iops;
-                        inode->i_fop = &gfs2_file_fops;
-                        inode->i_mapping->a_ops = &gfs2_file_aops;
-                } else if (S_ISDIR(mode)) {
-                        inode->i_op = &gfs2_dir_iops;
-                        inode->i_fop = &gfs2_dir_fops;
-                } else if (S_ISLNK(mode)) {
-                        inode->i_op = &gfs2_symlink_iops;
-                } else {
-                        inode->i_op = &gfs2_dev_iops;
-                }
-                error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+                error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
                if (unlikely(error))
                        goto fail;
                ip->i_gl->gl_object = ip;
-                error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+                error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
                if (unlikely(error))
                        goto fail_put;
@@ -123,12 +147,38 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *i
                error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
                if (unlikely(error))
                        goto fail_iopen;
+                ip->i_iopen_gh.gh_gl->gl_object = ip;
                gfs2_glock_put(io_gl);
-                unlock_new_inode(inode);
+                if ((type == DT_UNKNOWN) && (no_formal_ino == 0))
+                        goto gfs2_nfsbypass;
+                inode->i_mode = DT2IF(type);
+                /*
+                 * We must read the inode in order to work out its type in
+                 * this case. Note that this doesn't happen often as we normally
+                 * know the type beforehand. This code path only occurs during
+                 * unlinked inode recovery (where it is safe to do this glock,
+                 * which is not true in the general case).
+                 */
+                if (type == DT_UNKNOWN) {
+                        struct gfs2_holder gh;
+                        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
+                        if (unlikely(error))
+                                goto fail_glock;
+                        /* Inode is now uptodate */
+                        gfs2_glock_dq_uninit(&gh);
+                }
+                gfs2_set_iop(inode);
        }
+gfs2_nfsbypass:
        return inode;
+fail_glock:
+        gfs2_glock_dq(&ip->i_iopen_gh);
 fail_iopen:
        gfs2_glock_put(io_gl);
 fail_put:
@@ -144,14 +194,12 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        struct gfs2_dinode_host *di = &ip->i_di;
        const struct gfs2_dinode *str = buf;
-        if (ip->i_num.no_addr != be64_to_cpu(str->di_num.no_addr)) {
+        if (ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)) {
                if (gfs2_consist_inode(ip))
                        gfs2_dinode_print(ip);
                return -EIO;
        }
-        if (ip->i_num.no_formal_ino != be64_to_cpu(str->di_num.no_formal_ino))
+        ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
-                return -ESTALE;
        ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
        ip->i_inode.i_rdev = 0;
        switch (ip->i_inode.i_mode & S_IFMT) {
@@ -175,11 +223,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
        di->di_blocks = be64_to_cpu(str->di_blocks);
        gfs2_set_inode_blocks(&ip->i_inode);
        ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime);
-        ip->i_inode.i_atime.tv_nsec = 0;
+        ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
        ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-        ip->i_inode.i_mtime.tv_nsec = 0;
+        ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
        ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-        ip->i_inode.i_ctime.tv_nsec = 0;
+        ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
        di->di_goal_meta = be64_to_cpu(str->di_goal_meta);
        di->di_goal_data = be64_to_cpu(str->di_goal_data);
@@ -247,7 +295,7 @@ int gfs2_dinode_dealloc(struct gfs2_inode *ip)
        if (error)
                goto out_qs;
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        if (!rgd) {
                gfs2_consist_inode(ip);
                error = -EIO;
@@ -314,7 +362,7 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
        else
                drop_nlink(&ip->i_inode);
-        ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+        ip->i_inode.i_ctime = CURRENT_TIME;
        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -366,9 +414,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
        struct super_block *sb = dir->i_sb;
        struct gfs2_inode *dip = GFS2_I(dir);
        struct gfs2_holder d_gh;
-        struct gfs2_inum_host inum;
+        int error = 0;
-        unsigned int type;
-        int error;
        struct inode *inode = NULL;
        int unlock = 0;
@@ -395,12 +441,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
                        goto out;
        }
-        error = gfs2_dir_search(dir, name, &inum, &type);
+        inode = gfs2_dir_search(dir, name);
-        if (error)
+        if (IS_ERR(inode))
-                goto out;
+                error = PTR_ERR(inode);
-        inode = gfs2_inode_lookup(sb, &inum, type);
 out:
        if (unlock)
                gfs2_glock_dq_uninit(&d_gh);
@@ -409,6 +452,22 @@ out:
        return inode ? inode : ERR_PTR(error);
 }
+static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
+{
+        const struct gfs2_inum_range *str = buf;
+        ir->ir_start = be64_to_cpu(str->ir_start);
+        ir->ir_length = be64_to_cpu(str->ir_length);
+}
+static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
+{
+        struct gfs2_inum_range *str = buf;
+        str->ir_start = cpu_to_be64(ir->ir_start);
+        str->ir_length = cpu_to_be64(ir->ir_length);
+}
 static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
@@ -548,7 +607,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (!dip->i_inode.i_nlink)
                return -EPERM;
-        error = gfs2_dir_search(&dip->i_inode, name, NULL, NULL);
+        error = gfs2_dir_check(&dip->i_inode, name, NULL);
        switch (error) {
        case -ENOENT:
                error = 0;
@@ -588,8 +647,7 @@ static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
                *gid = current->fsgid;
 }
-static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
+static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
-                        u64 *generation)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        int error;
@@ -605,7 +663,7 @@ static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_inum_host *inum,
        if (error)
                goto out_ipreserv;
-        inum->no_addr = gfs2_alloc_di(dip, generation);
+        *no_addr = gfs2_alloc_di(dip, generation);
        gfs2_trans_end(sdp);
@@ -635,6 +693,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
        struct gfs2_dinode *di;
        struct buffer_head *dibh;
+        struct timespec tv = CURRENT_TIME;
        dibh = gfs2_meta_new(gl, inum->no_addr);
        gfs2_trans_add_bh(gl, dibh, 1);
@@ -650,7 +709,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_nlink = 0;
        di->di_size = 0;
        di->di_blocks = cpu_to_be64(1);
-        di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
+        di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(tv.tv_sec);
        di->di_major = cpu_to_be32(MAJOR(dev));
        di->di_minor = cpu_to_be32(MINOR(dev));
        di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
@@ -680,6 +739,9 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
        di->di_entries = 0;
        memset(&di->__pad4, 0, sizeof(di->__pad4));
        di->di_eattr = 0;
+        di->di_atime_nsec = cpu_to_be32(tv.tv_nsec);
+        di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec);
+        di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec);
        memset(&di->di_reserved, 0, sizeof(di->di_reserved));
        brelse(dibh);
@@ -749,7 +811,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         2 * RES_DINODE +
                                         RES_STATFS + RES_QUOTA, 0);
                if (error)
@@ -760,7 +822,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
                        goto fail_quota_locks;
        }
-        error = gfs2_dir_add(&dip->i_inode, name, &ip->i_num, IF2DT(ip->i_inode.i_mode));
+        error = gfs2_dir_add(&dip->i_inode, name, ip, IF2DT(ip->i_inode.i_mode));
        if (error)
                goto fail_end_trans;
@@ -840,11 +902,11 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
 struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
                           unsigned int mode, dev_t dev)
 {
-        struct inode *inode;
+        struct inode *inode = NULL;
        struct gfs2_inode *dip = ghs->gh_gl->gl_object;
        struct inode *dir = &dip->i_inode;
        struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-        struct gfs2_inum_host inum;
+        struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
        int error;
        u64 generation;
@@ -864,7 +926,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock;
-        error = alloc_dinode(dip, &inum, &generation);
+        error = alloc_dinode(dip, &inum.no_addr, &generation);
        if (error)
                goto fail_gunlock;
@@ -877,34 +939,36 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
        if (error)
                goto fail_gunlock2;
-        inode = gfs2_inode_lookup(dir->i_sb, &inum, IF2DT(mode));
+        inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode),
+                                        inum.no_addr,
+                                        inum.no_formal_ino);
        if (IS_ERR(inode))
                goto fail_gunlock2;
        error = gfs2_inode_refresh(GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        error = gfs2_acl_create(dip, GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        error = gfs2_security_init(dip, GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        error = link_dinode(dip, name, GFS2_I(inode));
        if (error)
-                goto fail_iput;
+                goto fail_gunlock2;
        if (!inode)
                return ERR_PTR(-ENOMEM);
        return inode;
-fail_iput:
-        iput(inode);
 fail_gunlock2:
        gfs2_glock_dq_uninit(ghs + 1);
+        if (inode)
+                iput(inode);
 fail_gunlock:
        gfs2_glock_dq(ghs);
 fail:
@@ -976,10 +1040,8 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
 */
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                   struct gfs2_inode *ip)
+                   const struct gfs2_inode *ip)
 {
-        struct gfs2_inum_host inum;
-        unsigned int type;
        int error;
        if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode))
@@ -997,18 +1059,10 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (error)
                return error;
-        error = gfs2_dir_search(&dip->i_inode, name, &inum, &type);
+        error = gfs2_dir_check(&dip->i_inode, name, ip);
        if (error)
                return error;
-        if (!gfs2_inum_equal(&inum, &ip->i_num))
-                return -ENOENT;
-        if (IF2DT(ip->i_inode.i_mode) != type) {
-                gfs2_consist_inode(dip);
-                return -EIO;
-        }
        return 0;
 }
@@ -1132,10 +1186,11 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
        struct gfs2_glock *gl = gh->gh_gl;
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct gfs2_inode *ip = gl->gl_object;
-        s64 curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
+        s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum);
        unsigned int state;
        int flags;
        int error;
+        struct timespec tv = CURRENT_TIME;
        if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
            gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
@@ -1153,8 +1208,7 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
            (sdp->sd_vfs->s_flags & MS_RDONLY))
                return 0;
-        curtime = get_seconds();
+        if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
-        if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
                gfs2_glock_dq(gh);
                gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY,
                                   gh);
@@ -1165,8 +1219,8 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
                /* Verify that atime hasn't been updated while we were
                   trying to get exclusive lock. */
-                curtime = get_seconds();
+                tv = CURRENT_TIME;
-                if (curtime - ip->i_inode.i_atime.tv_sec >= quantum) {
+                if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) {
                        struct buffer_head *dibh;
                        struct gfs2_dinode *di;
@@ -1180,11 +1234,12 @@ int gfs2_glock_nq_atime(struct gfs2_holder *gh)
                        if (error)
                                goto fail_end_trans;
-                        ip->i_inode.i_atime.tv_sec = curtime;
+                        ip->i_inode.i_atime = tv;
                        gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                        di = (struct gfs2_dinode *)dibh->b_data;
                        di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+                        di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
                        brelse(dibh);
                        gfs2_trans_end(sdp);
@@ -1252,3 +1307,66 @@ int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
        return error;
 }
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
+{
+        const struct gfs2_dinode_host *di = &ip->i_di;
+        struct gfs2_dinode *str = buf;
+        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
+        str->di_header.__pad0 = 0;
+        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
+        str->di_header.__pad1 = 0;
+        str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
+        str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
+        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
+        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
+        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
+        str->di_size = cpu_to_be64(di->di_size);
+        str->di_blocks = cpu_to_be64(di->di_blocks);
+        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
+        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
+        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+        str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
+        str->di_goal_data = cpu_to_be64(di->di_goal_data);
+        str->di_generation = cpu_to_be64(di->di_generation);
+        str->di_flags = cpu_to_be32(di->di_flags);
+        str->di_height = cpu_to_be16(di->di_height);
+        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+                                             !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
+                                             GFS2_FORMAT_DE : 0);
+        str->di_depth = cpu_to_be16(di->di_depth);
+        str->di_entries = cpu_to_be32(di->di_entries);
+        str->di_eattr = cpu_to_be64(di->di_eattr);
+        str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
+        str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
+        str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+}
+void gfs2_dinode_print(const struct gfs2_inode *ip)
+{
+        const struct gfs2_dinode_host *di = &ip->i_di;
+        printk(KERN_INFO "  no_formal_ino = %llu\n",
+               (unsigned long long)ip->i_no_formal_ino);
+        printk(KERN_INFO "  no_addr = %llu\n",
+               (unsigned long long)ip->i_no_addr);
+        printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
+        printk(KERN_INFO "  di_blocks = %llu\n",
+               (unsigned long long)di->di_blocks);
+        printk(KERN_INFO "  di_goal_meta = %llu\n",
+               (unsigned long long)di->di_goal_meta);
+        printk(KERN_INFO "  di_goal_data = %llu\n",
+               (unsigned long long)di->di_goal_data);
+        printk(KERN_INFO "  di_flags = 0x%.8X\n", di->di_flags);
+        printk(KERN_INFO "  di_height = %u\n", di->di_height);
+        printk(KERN_INFO "  di_depth = %u\n", di->di_depth);
+        printk(KERN_INFO "  di_entries = %u\n", di->di_entries);
+        printk(KERN_INFO "  di_eattr = %llu\n",
+               (unsigned long long)di->di_eattr);
+}
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b57f448b15bc..4517ac82c01c 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -10,17 +10,17 @@
 #ifndef __INODE_DOT_H__
 #define __INODE_DOT_H__
-static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
+static inline int gfs2_is_stuffed(const struct gfs2_inode *ip)
 {
        return !ip->i_di.di_height;
 }
-static inline int gfs2_is_jdata(struct gfs2_inode *ip)
+static inline int gfs2_is_jdata(const struct gfs2_inode *ip)
 {
        return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
-static inline int gfs2_is_dir(struct gfs2_inode *ip)
+static inline int gfs2_is_dir(const struct gfs2_inode *ip)
 {
        return S_ISDIR(ip->i_inode.i_mode);
 }
@@ -32,9 +32,25 @@ static inline void gfs2_set_inode_blocks(struct inode *inode)
                (GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
 }
+static inline int gfs2_check_inum(const struct gfs2_inode *ip, u64 no_addr,
+                                  u64 no_formal_ino)
+{
+        return ip->i_no_addr == no_addr && ip->i_no_formal_ino == no_formal_ino;
+}
+static inline void gfs2_inum_out(const struct gfs2_inode *ip,
+                                 struct gfs2_dirent *dent)
+{
+        dent->de_inum.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
+        dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr);
+}
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
-struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
+void gfs2_set_iop(struct inode *inode);
-struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
+struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
+                                u64 no_addr, u64 no_formal_ino);
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 int gfs2_inode_refresh(struct gfs2_inode *ip);
@@ -47,12 +63,14 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
 int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
                struct gfs2_inode *ip);
 int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
-                   struct gfs2_inode *ip);
+                   const struct gfs2_inode *ip);
 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
 int gfs2_glock_nq_atime(struct gfs2_holder *gh);
 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
+void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
+void gfs2_dinode_print(const struct gfs2_inode *ip);
 #endif /* __INODE_DOT_H__ */
diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
index c305255bfe8a..542a797ac89a 100644
--- a/fs/gfs2/locking/dlm/lock.c
+++ b/fs/gfs2/locking/dlm/lock.c
@@ -174,7 +174,6 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
        lp->cur = DLM_LOCK_IV;
        lp->lvb = NULL;
        lp->hold_null = NULL;
-        init_completion(&lp->ast_wait);
        INIT_LIST_HEAD(&lp->clist);
        INIT_LIST_HEAD(&lp->blist);
        INIT_LIST_HEAD(&lp->delay_list);
@@ -399,6 +398,12 @@ static void gdlm_del_lvb(struct gdlm_lock *lp)
        lp->lksb.sb_lvbptr = NULL;
 }
+static int gdlm_ast_wait(void *word)
+{
+        schedule();
+        return 0;
+}
 /* This can do a synchronous dlm request (requiring a lock_dlm thread to get
   the completion) because gfs won't call hold_lvb() during a callback (from
   the context of a lock_dlm thread). */
@@ -424,10 +429,10 @@ static int hold_null_lock(struct gdlm_lock *lp)
        lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
        set_bit(LFL_NOBAST, &lpn->flags);
        set_bit(LFL_INLOCK, &lpn->flags);
+        set_bit(LFL_AST_WAIT, &lpn->flags);
-        init_completion(&lpn->ast_wait);
        gdlm_do_lock(lpn);
-        wait_for_completion(&lpn->ast_wait);
+        wait_on_bit(&lpn->flags, LFL_AST_WAIT, gdlm_ast_wait, TASK_UNINTERRUPTIBLE);
        error = lpn->lksb.sb_status;
        if (error) {
                printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index d074c6e6f9bf..24d70f73b651 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -101,6 +101,7 @@ enum {
        LFL_NOBAST              = 10,
        LFL_HEADQUE             = 11,
        LFL_UNLOCK_DELETE       = 12,
+        LFL_AST_WAIT            = 13,
 };
 struct gdlm_lock {
@@ -117,7 +118,6 @@ struct gdlm_lock {
        unsigned long           flags;          /* lock_dlm flags LFL_ */
        int                     bast_mode;      /* protected by async_lock */
-        struct completion       ast_wait;
        struct list_head        clist;          /* complete */
        struct list_head        blist;          /* blocking */
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index 1d8faa3da8af..41c5b04caaba 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -147,7 +147,7 @@ static int gdlm_mount(char *table_name, char *host_data,
        error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
                                  &ls->dlm_lockspace,
-                                  nodir ? DLM_LSFL_NODIR : 0,
+                                  DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0),
                                  GDLM_LVB_SIZE);
        if (error) {
                log_error("dlm_new_lockspace error %d", error);
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index f82495e18c2d..fba1f1d87e4f 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -242,7 +242,7 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        op->info.number         = name->ln_number;
        op->info.start          = fl->fl_start;
        op->info.end            = fl->fl_end;
+        op->info.owner          = (__u64)(long) fl->fl_owner;
        send_op(op);
        wait_event(recv_wq, (op->done != 0));
@@ -254,16 +254,20 @@ int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
        }
        spin_unlock(&ops_lock);
+        /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
+           -ENOENT if there are no locks on the file */
        rv = op->info.rv;
        fl->fl_type = F_UNLCK;
        if (rv == -ENOENT)
                rv = 0;
-        else if (rv == 0 && op->info.pid != fl->fl_pid) {
+        else if (rv > 0) {
                fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
                fl->fl_pid = op->info.pid;
                fl->fl_start = op->info.start;
                fl->fl_end = op->info.end;
+                rv = 0;
        }
        kfree(op);
diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
index 9cf1f168eaf8..1aca51e45092 100644
--- a/fs/gfs2/locking/dlm/thread.c
+++ b/fs/gfs2/locking/dlm/thread.c
@@ -44,6 +44,13 @@ static void process_blocking(struct gdlm_lock *lp, int bast_mode)
        ls->fscb(ls->sdp, cb, &lp->lockname);
 }
+static void wake_up_ast(struct gdlm_lock *lp)
+{
+        clear_bit(LFL_AST_WAIT, &lp->flags);
+        smp_mb__after_clear_bit();
+        wake_up_bit(&lp->flags, LFL_AST_WAIT);
+}
 static void process_complete(struct gdlm_lock *lp)
 {
        struct gdlm_ls *ls = lp->ls;
@@ -136,7 +143,7 @@ static void process_complete(struct gdlm_lock *lp)
         */
        if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
-                complete(&lp->ast_wait);
+                wake_up_ast(lp);
                return;
        }
@@ -214,7 +221,7 @@ out:
        if (test_bit(LFL_INLOCK, &lp->flags)) {
                clear_bit(LFL_NOBLOCK, &lp->flags);
                lp->cur = lp->req;
-                complete(&lp->ast_wait);
+                wake_up_ast(lp);
                return;
        }
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 291415ddfe51..f49a12e24086 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -83,6 +83,11 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
                        gfs2_assert(sdp, bd->bd_ail == ai);
+                        if (!bh){
+                                list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                                continue;
+                        }
                        if (!buffer_busy(bh)) {
                                if (!buffer_uptodate(bh)) {
                                        gfs2_log_unlock(sdp);
@@ -125,6 +130,11 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl
                                         bd_ail_st_list) {
                bh = bd->bd_bh;
+                if (!bh){
+                        list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
+                        continue;
+                }
                gfs2_assert(sdp, bd->bd_ail == ai);
                if (buffer_busy(bh)) {
@@ -262,8 +272,8 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 * @sdp: The GFS2 superblock
 * @blks: The number of blocks to reserve
 *
- * Note that we never give out the last 6 blocks of the journal. Thats
+ * Note that we never give out the last few blocks of the journal. Thats
- * due to the fact that there is are a small number of header blocks
+ * due to the fact that there is a small number of header blocks
 * associated with each log flush. The exact number can't be known until
 * flush time, so we ensure that we have just enough free blocks at all
 * times to avoid running out during a log flush.
@@ -274,6 +284,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
 {
        unsigned int try = 0;
+        unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize);
        if (gfs2_assert_warn(sdp, blks) ||
            gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
@@ -281,7 +292,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
        mutex_lock(&sdp->sd_log_reserve_mutex);
        gfs2_log_lock(sdp);
-        while(sdp->sd_log_blks_free <= (blks + 6)) {
+        while(sdp->sd_log_blks_free <= (blks + reserved_blks)) {
                gfs2_log_unlock(sdp);
                gfs2_ail1_empty(sdp, 0);
                gfs2_log_flush(sdp, NULL);
@@ -357,6 +368,58 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
        return dist;
 }
+/**
+ * calc_reserved - Calculate the number of blocks to reserve when
+ *                 refunding a transaction's unused buffers.
+ * @sdp: The GFS2 superblock
+ *
+ * This is complex.  We need to reserve room for all our currently used
+ * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
+ * all our journaled data buffers for journaled files (e.g. files in the 
+ * meta_fs like rindex, or files for which chattr +j was done.)
+ * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
+ * will count it as free space (sd_log_blks_free) and corruption will follow.
+ *
+ * We can have metadata bufs and jdata bufs in the same journal.  So each
+ * type gets its own log header, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one header 
+ * in cases where we have more buffers than will fit on a journal page.
+ * Metadata journal entries take up half the space of journaled buffer entries.
+ * Thus, metadata entries have buf_limit (502) and journaled buffers have
+ * databuf_limit (251) before they cause a wrap around.
+ *
+ * Also, we need to reserve blocks for revoke journal entries and one for an
+ * overall header for the lot.
+ *
+ * Returns: the number of blocks reserved
+ */
+static unsigned int calc_reserved(struct gfs2_sbd *sdp)
+{
+        unsigned int reserved = 0;
+        unsigned int mbuf_limit, metabufhdrs_needed;
+        unsigned int dbuf_limit, databufhdrs_needed;
+        unsigned int revokes = 0;
+        mbuf_limit = buf_limit(sdp);
+        metabufhdrs_needed = (sdp->sd_log_commited_buf +
+                              (mbuf_limit - 1)) / mbuf_limit;
+        dbuf_limit = databuf_limit(sdp);
+        databufhdrs_needed = (sdp->sd_log_commited_databuf +
+                              (dbuf_limit - 1)) / dbuf_limit;
+        if (sdp->sd_log_commited_revoke)
+                revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
+                                          sizeof(u64));
+        reserved = sdp->sd_log_commited_buf + metabufhdrs_needed +
+                sdp->sd_log_commited_databuf + databufhdrs_needed +
+                revokes;
+        /* One for the overall header */
+        if (reserved)
+                reserved++;
+        return reserved;
+}
 static unsigned int current_tail(struct gfs2_sbd *sdp)
 {
        struct gfs2_ail *ai;
@@ -447,14 +510,14 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
        return bh;
 }
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
+static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
        unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
        ail2_empty(sdp, new_tail);
        gfs2_log_lock(sdp);
-        sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
+        sdp->sd_log_blks_free += dist;
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
        gfs2_log_unlock(sdp);
@@ -504,7 +567,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
        brelse(bh);
        if (sdp->sd_log_tail != tail)
-                log_pull_tail(sdp, tail, pull);
+                log_pull_tail(sdp, tail);
        else
                gfs2_assert_withdraw(sdp, !pull);
@@ -517,6 +580,7 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
        struct list_head *head = &sdp->sd_log_flush_list;
        struct gfs2_log_buf *lb;
        struct buffer_head *bh;
+        int flushcount = 0;
        while (!list_empty(head)) {
                lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
@@ -533,9 +597,20 @@ static void log_flush_commit(struct gfs2_sbd *sdp)
                } else
                        brelse(bh);
                kfree(lb);
+                flushcount++;
        }
-        log_write_header(sdp, 0, 0);
+        /* If nothing was journaled, the header is unplanned and unwanted. */
+        if (flushcount) {
+                log_write_header(sdp, 0, 0);
+        } else {
+                unsigned int tail;
+                tail = current_tail(sdp);
+                gfs2_ail1_empty(sdp, 0);
+                if (sdp->sd_log_tail != tail)
+                        log_pull_tail(sdp, tail);
+        }
 }
 /**
@@ -565,7 +640,10 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        INIT_LIST_HEAD(&ai->ai_ail1_list);
        INIT_LIST_HEAD(&ai->ai_ail2_list);
-        gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
+        gfs2_assert_withdraw(sdp,
+                             sdp->sd_log_num_buf + sdp->sd_log_num_jdata ==
+                             sdp->sd_log_commited_buf +
+                             sdp->sd_log_commited_databuf);
        gfs2_assert_withdraw(sdp,
                        sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
@@ -576,16 +654,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
        lops_before_commit(sdp);
        if (!list_empty(&sdp->sd_log_flush_list))
                log_flush_commit(sdp);
-        else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
+        else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
+                gfs2_log_lock(sdp);
+                sdp->sd_log_blks_free--; /* Adjust for unreserved buffer */
+                gfs2_log_unlock(sdp);
                log_write_header(sdp, 0, PULL);
+        }
        lops_after_commit(sdp, ai);
        gfs2_log_lock(sdp);
        sdp->sd_log_head = sdp->sd_log_flush_head;
-        sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
        sdp->sd_log_blks_reserved = 0;
        sdp->sd_log_commited_buf = 0;
-        sdp->sd_log_num_hdrs = 0;
+        sdp->sd_log_commited_databuf = 0;
        sdp->sd_log_commited_revoke = 0;
        if (!list_empty(&ai->ai_ail1_list)) {
@@ -602,32 +683,26 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-        unsigned int reserved = 0;
+        unsigned int reserved;
        unsigned int old;
        gfs2_log_lock(sdp);
        sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
-        gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
+        sdp->sd_log_commited_databuf += tr->tr_num_databuf_new -
+                tr->tr_num_databuf_rm;
+        gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) ||
+                             (((int)sdp->sd_log_commited_databuf) >= 0));
        sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
        gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
+        reserved = calc_reserved(sdp);
-        if (sdp->sd_log_commited_buf)
-                reserved += sdp->sd_log_commited_buf;
-        if (sdp->sd_log_commited_revoke)
-                reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
-                                            sizeof(u64));
-        if (reserved)
-                reserved++;
        old = sdp->sd_log_blks_free;
        sdp->sd_log_blks_free += tr->tr_reserved -
                                 (reserved - sdp->sd_log_blks_reserved);
        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
-        gfs2_assert_withdraw(sdp,
+        gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <=
-                             sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
+                             sdp->sd_jdesc->jd_blocks);
-                             sdp->sd_log_num_hdrs);
        sdp->sd_log_blks_reserved = reserved;
@@ -673,13 +748,13 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
-        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
        gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
        sdp->sd_log_flush_head = sdp->sd_log_head;
        sdp->sd_log_flush_wrapped = 0;
-        log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
+        log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT,
+                         (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL);
        gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
        gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index f82d84d05d23..aff70f0698fd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -17,6 +17,7 @@
 #include "gfs2.h"
 #include "incore.h"
+#include "inode.h"
 #include "glock.h"
 #include "log.h"
 #include "lops.h"
@@ -117,15 +118,13 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
        struct gfs2_log_descriptor *ld;
        struct gfs2_bufdata *bd1 = NULL, *bd2;
        unsigned int total = sdp->sd_log_num_buf;
-        unsigned int offset = sizeof(struct gfs2_log_descriptor);
+        unsigned int offset = BUF_OFFSET;
        unsigned int limit;
        unsigned int num;
        unsigned n;
        __be64 *ptr;
-        offset += sizeof(__be64) - 1;
+        limit = buf_limit(sdp);
-        offset &= ~(sizeof(__be64) - 1);
-        limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
        /* for 4k blocks, limit = 503 */
        bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
@@ -134,7 +133,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp)
                if (total > limit)
                        num = limit;
                bh = gfs2_log_get_buf(sdp);
-                sdp->sd_log_num_hdrs++;
                ld = (struct gfs2_log_descriptor *)bh->b_data;
                ptr = (__be64 *)(bh->b_data + offset);
                ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -469,25 +467,28 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
        struct gfs2_inode *ip = GFS2_I(mapping->host);
        gfs2_log_lock(sdp);
+        if (!list_empty(&bd->bd_list_tr)) {
+                gfs2_log_unlock(sdp);
+                return;
+        }
        tr->tr_touched = 1;
-        if (list_empty(&bd->bd_list_tr) &&
+        if (gfs2_is_jdata(ip)) {
-            (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
                tr->tr_num_buf++;
                list_add(&bd->bd_list_tr, &tr->tr_list_buf);
-                gfs2_log_unlock(sdp);
-                gfs2_pin(sdp, bd->bd_bh);
-                tr->tr_num_buf_new++;
-        } else {
-                gfs2_log_unlock(sdp);
        }
+        gfs2_log_unlock(sdp);
+        if (!list_empty(&le->le_list))
+                return;
        gfs2_trans_add_gl(bd->bd_gl);
-        gfs2_log_lock(sdp);
+        if (gfs2_is_jdata(ip)) {
-        if (list_empty(&le->le_list)) {
+                sdp->sd_log_num_jdata++;
-                if (ip->i_di.di_flags & GFS2_DIF_JDATA)
+                gfs2_pin(sdp, bd->bd_bh);
-                        sdp->sd_log_num_jdata++;
+                tr->tr_num_databuf_new++;
-                sdp->sd_log_num_databuf++;
-                list_add(&le->le_list, &sdp->sd_log_le_databuf);
        }
+        sdp->sd_log_num_databuf++;
+        gfs2_log_lock(sdp);
+        list_add(&le->le_list, &sdp->sd_log_le_databuf);
        gfs2_log_unlock(sdp);
 }
@@ -520,7 +521,6 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
        LIST_HEAD(started);
        struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
        struct buffer_head *bh = NULL,*bh1 = NULL;
-        unsigned int offset = sizeof(struct gfs2_log_descriptor);
        struct gfs2_log_descriptor *ld;
        unsigned int limit;
        unsigned int total_dbuf = sdp->sd_log_num_databuf;
@@ -528,9 +528,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
        unsigned int num, n;
        __be64 *ptr = NULL;
-        offset += 2*sizeof(__be64) - 1;
+        limit = databuf_limit(sdp);
-        offset &= ~(2*sizeof(__be64) - 1);
-        limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
        /*
         * Start writing ordered buffers, write journaled buffers
@@ -581,10 +579,10 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                                gfs2_log_unlock(sdp);
                                if (!bh) {
                                        bh = gfs2_log_get_buf(sdp);
-                                        sdp->sd_log_num_hdrs++;
                                        ld = (struct gfs2_log_descriptor *)
                                             bh->b_data;
-                                        ptr = (__be64 *)(bh->b_data + offset);
+                                        ptr = (__be64 *)(bh->b_data +
+                                                         DATABUF_OFFSET);
                                        ld->ld_header.mh_magic =
                                                cpu_to_be32(GFS2_MAGIC);
                                        ld->ld_header.mh_type =
@@ -605,7 +603,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                                if (unlikely(magic != 0))
                                        set_buffer_escaped(bh1);
                                gfs2_log_lock(sdp);
-                                if (n++ > num)
+                                if (++n >= num)
                                        break;
                        } else if (!bh1) {
                                total_dbuf--;
@@ -622,6 +620,7 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
                }
                gfs2_log_unlock(sdp);
                if (bh) {
+                        set_buffer_mapped(bh);
                        set_buffer_dirty(bh);
                        ll_rw_block(WRITE, 1, &bh);
                        bh = NULL;
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h
index 965bc65c7c64..41a00df75587 100644
--- a/fs/gfs2/lops.h
+++ b/fs/gfs2/lops.h
@@ -13,6 +13,13 @@
 #include <linux/list.h>
 #include "incore.h"
+#define BUF_OFFSET \
+        ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
+         ~(sizeof(__be64) - 1))
+#define DATABUF_OFFSET \
+        ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
+         ~(2 * sizeof(__be64) - 1))
 extern const struct gfs2_log_operations gfs2_glock_lops;
 extern const struct gfs2_log_operations gfs2_buf_lops;
 extern const struct gfs2_log_operations gfs2_revoke_lops;
@@ -21,6 +28,22 @@ extern const struct gfs2_log_operations gfs2_databuf_lops;
 extern const struct gfs2_log_operations *gfs2_log_ops[];
+static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
+{
+        unsigned int limit;
+        limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
+        return limit;
+}
+static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
+{
+        unsigned int limit;
+        limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
+        return limit;
+}
 static inline void lops_init_le(struct gfs2_log_element *le,
                                const struct gfs2_log_operations *lops)
 {
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e62d4f620c58..8da343b34ae7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -387,12 +387,18 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
                        if (test_clear_buffer_pinned(bh)) {
                                struct gfs2_trans *tr = current->journal_info;
+                                struct gfs2_inode *bh_ip =
+                                        GFS2_I(bh->b_page->mapping->host);
                                gfs2_log_lock(sdp);
                                list_del_init(&bd->bd_le.le_list);
                                gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
                                sdp->sd_log_num_buf--;
                                gfs2_log_unlock(sdp);
-                                tr->tr_num_buf_rm++;
+                                if (bh_ip->i_inode.i_private != NULL)
+                                        tr->tr_num_databuf_rm++;
+                                else
+                                        tr->tr_num_buf_rm++;
                                brelse(bh);
                        }
                        if (bd) {
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index e037425bc042..527bf19d9690 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -63,7 +63,7 @@ int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
 static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
                                         struct buffer_head **bhp)
 {
-        return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
+        return gfs2_meta_indirect_buffer(ip, 0, ip->i_no_addr, 0, bhp);
 }
 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index 4864659555d4..6f006a804db3 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -82,20 +82,19 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
        char *options, *o, *v;
        int error = 0;
-        if (!remount) {
+        /*  If someone preloaded options, use those instead  */
-                /*  If someone preloaded options, use those instead  */
+        spin_lock(&gfs2_sys_margs_lock);
-                spin_lock(&gfs2_sys_margs_lock);
+        if (!remount && gfs2_sys_margs) {
-                if (gfs2_sys_margs) {
+                data = gfs2_sys_margs;
-                        data = gfs2_sys_margs;
+                gfs2_sys_margs = NULL;
-                        gfs2_sys_margs = NULL;
-                }
-                spin_unlock(&gfs2_sys_margs_lock);
-                /*  Set some defaults  */
-                args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
-                args->ar_quota = GFS2_QUOTA_DEFAULT;
-                args->ar_data = GFS2_DATA_DEFAULT;
        }
+        spin_unlock(&gfs2_sys_margs_lock);
+        /*  Set some defaults  */
+        memset(args, 0, sizeof(struct gfs2_args));
+        args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
+        args->ar_quota = GFS2_QUOTA_DEFAULT;
+        args->ar_data = GFS2_DATA_DEFAULT;
        /* Split the options into tokens with the "," character and
           process them */
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
deleted file mode 100644
index d9ecfd23a49e..000000000000
--- a/fs/gfs2/ondisk.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include "gfs2.h"
-#include <linux/gfs2_ondisk.h>
-#include <linux/lm_interface.h>
-#include "incore.h"
-#define pv(struct, member, fmt) printk(KERN_INFO "  "#member" = "fmt"\n", \
-                                       struct->member);
-/*
- * gfs2_xxx_in - read in an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_out - write out an xxx struct
- * first arg: the cpu-order structure
- * buf: the disk-order buffer
- *
- * gfs2_xxx_print - print out an xxx struct
- * first arg: the cpu-order structure
- */
-void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
-{
-        const struct gfs2_inum *str = buf;
-        no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
-        no->no_addr = be64_to_cpu(str->no_addr);
-}
-void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
-{
-        struct gfs2_inum *str = buf;
-        str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
-        str->no_addr = cpu_to_be64(no->no_addr);
-}
-static void gfs2_inum_print(const struct gfs2_inum_host *no)
-{
-        printk(KERN_INFO "  no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
-        printk(KERN_INFO "  no_addr = %llu\n", (unsigned long long)no->no_addr);
-}
-static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
-{
-        const struct gfs2_meta_header *str = buf;
-        mh->mh_magic = be32_to_cpu(str->mh_magic);
-        mh->mh_type = be32_to_cpu(str->mh_type);
-        mh->mh_format = be32_to_cpu(str->mh_format);
-}
-void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
-{
-        const struct gfs2_sb *str = buf;
-        gfs2_meta_header_in(&sb->sb_header, buf);
-        sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
-        sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
-        sb->sb_bsize = be32_to_cpu(str->sb_bsize);
-        sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
-        gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
-        gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
-        memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
-        memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
-}
-void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
-{
-        const struct gfs2_rindex *str = buf;
-        ri->ri_addr = be64_to_cpu(str->ri_addr);
-        ri->ri_length = be32_to_cpu(str->ri_length);
-        ri->ri_data0 = be64_to_cpu(str->ri_data0);
-        ri->ri_data = be32_to_cpu(str->ri_data);
-        ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
-}
-void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
-{
-        printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
-        pv(ri, ri_length, "%u");
-        printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
-        pv(ri, ri_data, "%u");
-        pv(ri, ri_bitbytes, "%u");
-}
-void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
-{
-        const struct gfs2_rgrp *str = buf;
-        rg->rg_flags = be32_to_cpu(str->rg_flags);
-        rg->rg_free = be32_to_cpu(str->rg_free);
-        rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
-        rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
-}
-void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
-{
-        struct gfs2_rgrp *str = buf;
-        str->rg_flags = cpu_to_be32(rg->rg_flags);
-        str->rg_free = cpu_to_be32(rg->rg_free);
-        str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
-        str->__pad = cpu_to_be32(0);
-        str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
-        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
-}
-void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
-{
-        const struct gfs2_quota *str = buf;
-        qu->qu_limit = be64_to_cpu(str->qu_limit);
-        qu->qu_warn = be64_to_cpu(str->qu_warn);
-        qu->qu_value = be64_to_cpu(str->qu_value);
-}
-void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
-{
-        const struct gfs2_dinode_host *di = &ip->i_di;
-        struct gfs2_dinode *str = buf;
-        str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
-        str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
-        str->di_header.__pad0 = 0;
-        str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
-        str->di_header.__pad1 = 0;
-        gfs2_inum_out(&ip->i_num, &str->di_num);
-        str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-        str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
-        str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
-        str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-        str->di_size = cpu_to_be64(di->di_size);
-        str->di_blocks = cpu_to_be64(di->di_blocks);
-        str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-        str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-        str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
-        str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
-        str->di_goal_data = cpu_to_be64(di->di_goal_data);
-        str->di_generation = cpu_to_be64(di->di_generation);
-        str->di_flags = cpu_to_be32(di->di_flags);
-        str->di_height = cpu_to_be16(di->di_height);
-        str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
-                                             !(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
-                                             GFS2_FORMAT_DE : 0);
-        str->di_depth = cpu_to_be16(di->di_depth);
-        str->di_entries = cpu_to_be32(di->di_entries);
-        str->di_eattr = cpu_to_be64(di->di_eattr);
-}
-void gfs2_dinode_print(const struct gfs2_inode *ip)
-{
-        const struct gfs2_dinode_host *di = &ip->i_di;
-        gfs2_inum_print(&ip->i_num);
-        printk(KERN_INFO "  di_size = %llu\n", (unsigned long long)di->di_size);
-        printk(KERN_INFO "  di_blocks = %llu\n", (unsigned long long)di->di_blocks);
-        printk(KERN_INFO "  di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
-        printk(KERN_INFO "  di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
-        pv(di, di_flags, "0x%.8X");
-        pv(di, di_height, "%u");
-        pv(di, di_depth, "%u");
-        pv(di, di_entries, "%u");
-        printk(KERN_INFO "  di_eattr = %llu\n", (unsigned long long)di->di_eattr);
-}
-void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
-{
-        const struct gfs2_log_header *str = buf;
-        gfs2_meta_header_in(&lh->lh_header, buf);
-        lh->lh_sequence = be64_to_cpu(str->lh_sequence);
-        lh->lh_flags = be32_to_cpu(str->lh_flags);
-        lh->lh_tail = be32_to_cpu(str->lh_tail);
-        lh->lh_blkno = be32_to_cpu(str->lh_blkno);
-        lh->lh_hash = be32_to_cpu(str->lh_hash);
-}
-void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
-{
-        const struct gfs2_inum_range *str = buf;
-        ir->ir_start = be64_to_cpu(str->ir_start);
-        ir->ir_length = be64_to_cpu(str->ir_length);
-}
-void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
-{
-        struct gfs2_inum_range *str = buf;
-        str->ir_start = cpu_to_be64(ir->ir_start);
-        str->ir_length = cpu_to_be64(ir->ir_length);
-}
-void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
-{
-        const struct gfs2_statfs_change *str = buf;
-        sc->sc_total = be64_to_cpu(str->sc_total);
-        sc->sc_free = be64_to_cpu(str->sc_free);
-        sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
-}
-void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
-{
-        struct gfs2_statfs_change *str = buf;
-        str->sc_total = cpu_to_be64(sc->sc_total);
-        str->sc_free = cpu_to_be64(sc->sc_free);
-        str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
-}
-void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
-{
-        const struct gfs2_quota_change *str = buf;
-        qc->qc_change = be64_to_cpu(str->qc_change);
-        qc->qc_flags = be32_to_cpu(str->qc_flags);
-        qc->qc_id = be32_to_cpu(str->qc_id);
-}
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 30c15622174f..26c888890c24 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,6 +32,7 @@
 #include "trans.h"
 #include "rgrp.h"
 #include "ops_file.h"
+#include "super.h"
 #include "util.h"
 #include "glops.h"
@@ -49,6 +50,8 @@ static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
                end = start + bsize;
                if (end <= from || start >= to)
                        continue;
+                if (gfs2_is_jdata(ip))
+                        set_buffer_uptodate(bh);
                gfs2_trans_add_bh(ip->i_gl, bh, 0);
        }
 }
@@ -134,7 +137,9 @@ static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
                return 0; /* don't care */
        }
-        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+        if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) &&
+            PageChecked(page)) {
+                ClearPageChecked(page);
                error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
                if (error)
                        goto out_ignore;
@@ -203,11 +208,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
         * so we need to supply one here. It doesn't happen often.
         */
        if (unlikely(page->index)) {
-                kaddr = kmap_atomic(page, KM_USER0);
+                zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
-                memset(kaddr, 0, PAGE_CACHE_SIZE);
-                kunmap_atomic(kaddr, KM_USER0);
-                flush_dcache_page(page);
-                SetPageUptodate(page);
                return 0;
        }
@@ -450,6 +451,31 @@ out_uninit:
 }
 /**
+ * adjust_fs_space - Adjusts the free space available due to gfs2_grow
+ * @inode: the rindex inode
+ */
+static void adjust_fs_space(struct inode *inode)
+{
+        struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
+        struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
+        struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
+        u64 fs_total, new_free;
+        /* Total up the file system space, according to the latest rindex. */
+        fs_total = gfs2_ri_total(sdp);
+        spin_lock(&sdp->sd_statfs_spin);
+        if (fs_total > (m_sc->sc_total + l_sc->sc_total))
+                new_free = fs_total - (m_sc->sc_total + l_sc->sc_total);
+        else
+                new_free = 0;
+        spin_unlock(&sdp->sd_statfs_spin);
+        fs_warn(sdp, "File system extended by %llu blocks.\n",
+                (unsigned long long)new_free);
+        gfs2_statfs_change(sdp, new_free, new_free, 0);
+}
+/**
 * gfs2_commit_write - Commit write to a file
 * @file: The file to write to
 * @page: The page containing the data
@@ -511,6 +537,9 @@ static int gfs2_commit_write(struct file *file, struct page *page,
                di->di_size = cpu_to_be64(inode->i_size);
        }
+        if (inode == sdp->sd_rindex)
+                adjust_fs_space(inode);
        brelse(dibh);
        gfs2_trans_end(sdp);
        if (al->al_requested) {
@@ -543,6 +572,23 @@ fail_nounlock:
 }
 /**
+ * gfs2_set_page_dirty - Page dirtying function
+ * @page: The page to dirty
+ *
+ * Returns: 1 if it dirtyed the page, or 0 otherwise
+ */
+ 
+static int gfs2_set_page_dirty(struct page *page)
+{
+        struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+        struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
+        if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+                SetPageChecked(page);
+        return __set_page_dirty_buffers(page);
+}
+/**
 * gfs2_bmap - Block map function
 * @mapping: Address space info
 * @lblock: The block to map
@@ -578,6 +624,8 @@ static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
        if (bd) {
                bd->bd_bh = NULL;
                bh->b_private = NULL;
+                if (!bd->bd_ail && list_empty(&bd->bd_le.le_list))
+                        kmem_cache_free(gfs2_bufdata_cachep, bd);
        }
        gfs2_log_unlock(sdp);
@@ -598,6 +646,8 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
        unsigned int curr_off = 0;
        BUG_ON(!PageLocked(page));
+        if (offset == 0)
+                ClearPageChecked(page);
        if (!page_has_buffers(page))
                return;
@@ -728,8 +778,8 @@ static unsigned limit = 0;
                        return;
                fs_warn(sdp, "ip = %llu %llu\n",
-                        (unsigned long long)ip->i_num.no_formal_ino,
+                        (unsigned long long)ip->i_no_formal_ino,
-                        (unsigned long long)ip->i_num.no_addr);
+                        (unsigned long long)ip->i_no_addr);
                for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
                        fs_warn(sdp, "ip->i_cache[%u] = %s\n",
@@ -810,6 +860,7 @@ const struct address_space_operations gfs2_file_aops = {
        .sync_page = block_sync_page,
        .prepare_write = gfs2_prepare_write,
        .commit_write = gfs2_commit_write,
+        .set_page_dirty = gfs2_set_page_dirty,
        .bmap = gfs2_bmap,
        .invalidatepage = gfs2_invalidatepage,
        .releasepage = gfs2_releasepage,
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index 35aaee4aa7e1..fa1b5b3d28b9 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index a6fdc52f554a..793e334d098e 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -21,6 +21,7 @@
 #include "glock.h"
 #include "ops_dentry.h"
 #include "util.h"
+#include "inode.h"
 /**
 * gfs2_drevalidate - Check directory lookup consistency
@@ -40,14 +41,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
        struct gfs2_inode *dip = GFS2_I(parent->d_inode);
        struct inode *inode = dentry->d_inode;
        struct gfs2_holder d_gh;
-        struct gfs2_inode *ip;
+        struct gfs2_inode *ip = NULL;
-        struct gfs2_inum_host inum;
-        unsigned int type;
        int error;
        int had_lock=0;
-        if (inode && is_bad_inode(inode))
+        if (inode) {
-                goto invalid;
+                if (is_bad_inode(inode))
+                        goto invalid;
+                ip = GFS2_I(inode);
+        }
        if (sdp->sd_args.ar_localcaching)
                goto valid;
@@ -59,7 +61,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
                        goto fail;
        } 
-        error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
+        error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
        switch (error) {
        case 0:
                if (!inode)
@@ -73,16 +75,6 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
                goto fail_gunlock;
        }
-        ip = GFS2_I(inode);
-        if (!gfs2_inum_equal(&ip->i_num, &inum))
-                goto invalid_gunlock;
-        if (IF2DT(ip->i_inode.i_mode) != type) {
-                gfs2_consist_inode(dip);
-                goto fail_gunlock;
-        }
 valid_gunlock:
        if (!had_lock)
                gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index aad918337a46..99ea5659bc2c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -22,10 +22,14 @@
 #include "glops.h"
 #include "inode.h"
 #include "ops_dentry.h"
-#include "ops_export.h"
+#include "ops_fstype.h"
 #include "rgrp.h"
 #include "util.h"
+#define GFS2_SMALL_FH_SIZE 4
+#define GFS2_LARGE_FH_SIZE 8
+#define GFS2_OLD_FH_SIZE 10
 static struct dentry *gfs2_decode_fh(struct super_block *sb,
                                     __u32 *p,
                                     int fh_len,
@@ -35,31 +39,28 @@ static struct dentry *gfs2_decode_fh(struct super_block *sb,
                                     void *context)
 {
        __be32 *fh = (__force __be32 *)p;
-        struct gfs2_fh_obj fh_obj;
+        struct gfs2_inum_host inum, parent;
-        struct gfs2_inum_host *this, parent;
-        this            = &fh_obj.this;
-        fh_obj.imode    = DT_UNKNOWN;
        memset(&parent, 0, sizeof(struct gfs2_inum));
        switch (fh_len) {
        case GFS2_LARGE_FH_SIZE:
+        case GFS2_OLD_FH_SIZE:
                parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
                parent.no_formal_ino |= be32_to_cpu(fh[5]);
                parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
                parent.no_addr |= be32_to_cpu(fh[7]);
-                fh_obj.imode = be32_to_cpu(fh[8]);
        case GFS2_SMALL_FH_SIZE:
-                this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
+                inum.no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
-                this->no_formal_ino |= be32_to_cpu(fh[1]);
+                inum.no_formal_ino |= be32_to_cpu(fh[1]);
-                this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
+                inum.no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
-                this->no_addr |= be32_to_cpu(fh[3]);
+                inum.no_addr |= be32_to_cpu(fh[3]);
                break;
        default:
                return NULL;
        }
-        return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
+        return gfs2_export_ops.find_exported_dentry(sb, &inum, &parent,
                                                    acceptable, context);
 }
@@ -75,10 +76,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
            (connectable && *len < GFS2_LARGE_FH_SIZE))
                return 255;
-        fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+        fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
-        fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+        fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
-        fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
+        fh[2] = cpu_to_be32(ip->i_no_addr >> 32);
-        fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+        fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
        *len = GFS2_SMALL_FH_SIZE;
        if (!connectable || inode == sb->s_root->d_inode)
@@ -90,13 +91,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
        igrab(inode);
        spin_unlock(&dentry->d_lock);
-        fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
+        fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
-        fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
+        fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
-        fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
+        fh[6] = cpu_to_be32(ip->i_no_addr >> 32);
-        fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
+        fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
-        fh[8]  = cpu_to_be32(inode->i_mode);
-        fh[9]  = 0;     /* pad to double word */
        *len = GFS2_LARGE_FH_SIZE;
        iput(inode);
@@ -144,7 +142,8 @@ static int gfs2_get_name(struct dentry *parent, char *name,
        ip = GFS2_I(inode);
        *name = 0;
-        gnfd.inum = ip->i_num;
+        gnfd.inum.no_addr = ip->i_no_addr;
+        gnfd.inum.no_formal_ino = ip->i_no_formal_ino;
        gnfd.name = name;
        error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
@@ -192,8 +191,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
 static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
-        struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
+        struct gfs2_inum_host *inum = inum_obj;
-        struct gfs2_inum_host *inum = &fh_obj->this;
        struct gfs2_holder i_gh, ri_gh, rgd_gh;
        struct gfs2_rgrpd *rgd;
        struct inode *inode;
@@ -202,9 +200,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
        /* System files? */
-        inode = gfs2_ilookup(sb, inum);
+        inode = gfs2_ilookup(sb, inum->no_addr);
        if (inode) {
-                if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
+                if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
                        iput(inode);
                        return ERR_PTR(-ESTALE);
                }
@@ -236,7 +234,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
        gfs2_glock_dq_uninit(&rgd_gh);
        gfs2_glock_dq_uninit(&ri_gh);
-        inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
+        inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
+                                        inum->no_addr,
+                                        0);
        if (!inode)
                goto fail;
        if (IS_ERR(inode)) {
@@ -250,6 +250,15 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
                goto fail;
        }
+        /* Pick up the works we bypass in gfs2_inode_lookup */
+        if (inode->i_state & I_NEW) 
+                gfs2_set_iop(inode);
+        if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
+                iput(inode);
+                goto fail;
+        }
        error = -EIO;
        if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
                iput(inode);
diff --git a/fs/gfs2/ops_export.h b/fs/gfs2/ops_export.h
deleted file mode 100644
index f925a955b3b8..000000000000
--- a/fs/gfs2/ops_export.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-#ifndef __OPS_EXPORT_DOT_H__
-#define __OPS_EXPORT_DOT_H__
-#define GFS2_SMALL_FH_SIZE 4
-#define GFS2_LARGE_FH_SIZE 10
-extern struct export_operations gfs2_export_ops;
-struct gfs2_fh_obj {
-        struct gfs2_inum_host this;
-        __u32            imode;
-};
-#endif /* __OPS_EXPORT_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 064df8804582..196d83266e34 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -502,7 +502,7 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
        struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
        struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
        struct lm_lockname name =
-                { .ln_number = ip->i_num.no_addr,
+                { .ln_number = ip->i_no_addr,
                  .ln_type = LM_TYPE_PLOCK };
        if (!(fl->fl_flags & FL_POSIX))
@@ -557,7 +557,7 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl)
                gfs2_glock_dq_uninit(fl_gh);
        } else {
                error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
-                                      ip->i_num.no_addr, &gfs2_flock_glops,
+                                      ip->i_no_addr, &gfs2_flock_glops,
                                      CREATE, &gl);
                if (error)
                        goto out;
@@ -635,7 +635,6 @@ const struct file_operations gfs2_file_fops = {
        .release        = gfs2_close,
        .fsync          = gfs2_fsync,
        .lock           = gfs2_lock,
-        .sendfile       = generic_file_sendfile,
        .flock          = gfs2_flock,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2c5f8e7def0d..cf5aa5050548 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -27,7 +27,6 @@
 #include "inode.h"
 #include "lm.h"
 #include "mount.h"
-#include "ops_export.h"
 #include "ops_fstype.h"
 #include "ops_super.h"
 #include "recovery.h"
@@ -105,6 +104,7 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
        sb->s_magic = GFS2_MAGIC;
        sb->s_op = &gfs2_super_ops;
        sb->s_export_op = &gfs2_export_ops;
+        sb->s_time_gran = 1;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
@@ -116,7 +116,6 @@ static void init_vfs(struct super_block *sb, unsigned noatime)
 static int init_names(struct gfs2_sbd *sdp, int silent)
 {
-        struct page *page;
        char *proto, *table;
        int error = 0;
@@ -126,14 +125,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        /*  Try to autodetect  */
        if (!proto[0] || !table[0]) {
-                struct gfs2_sb *sb;
+                error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-                page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+                if (error)
-                if (!page)
+                        return error;
-                        return -ENOBUFS;
-                sb = kmap(page);
-                gfs2_sb_in(&sdp->sd_sb, sb);
-                kunmap(page);
-                __free_page(page);
                error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
                if (error)
@@ -151,6 +145,9 @@ static int init_names(struct gfs2_sbd *sdp, int silent)
        snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
        snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
+        while ((table = strchr(sdp->sd_table_name, '/')))
+                *table = '_';
 out:
        return error;
 }
@@ -236,17 +233,17 @@ fail:
        return error;
 }
-static struct inode *gfs2_lookup_root(struct super_block *sb,
+static inline struct inode *gfs2_lookup_root(struct super_block *sb,
-                                      struct gfs2_inum_host *inum)
+                                             u64 no_addr)
 {
-        return gfs2_inode_lookup(sb, inum, DT_DIR);
+        return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
 }
 static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
 {
        struct super_block *sb = sdp->sd_vfs;
        struct gfs2_holder sb_gh;
-        struct gfs2_inum_host *inum;
+        u64 no_addr;
        struct inode *inode;
        int error = 0;
@@ -289,10 +286,10 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
        sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
        /* Get the root inode */
-        inum = &sdp->sd_sb.sb_root_dir;
+        no_addr = sdp->sd_sb.sb_root_dir.no_addr;
        if (sb->s_type == &gfs2meta_fs_type)
-                inum = &sdp->sd_sb.sb_master_dir;
+                no_addr = sdp->sd_sb.sb_master_dir.no_addr;
-        inode = gfs2_lookup_root(sb, inum);
+        inode = gfs2_lookup_root(sb, no_addr);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                fs_err(sdp, "can't read in root inode: %d\n", error);
@@ -449,7 +446,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
        if (undo)
                goto fail_qinode;
-        inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
+        inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr);
        if (IS_ERR(inode)) {
                error = PTR_ERR(inode);
                fs_err(sdp, "can't read in master directory: %d\n", error);
diff --git a/fs/gfs2/ops_fstype.h b/fs/gfs2/ops_fstype.h
index 7cc2c296271b..407029b3b2b3 100644
--- a/fs/gfs2/ops_fstype.h
+++ b/fs/gfs2/ops_fstype.h
@@ -14,5 +14,6 @@
 extern struct file_system_type gfs2_fs_type;
 extern struct file_system_type gfs2meta_fs_type;
+extern struct export_operations gfs2_export_ops;
 #endif /* __OPS_FSTYPE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d85f6e05cb95..911c115b5c6c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -157,7 +157,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (error)
                goto out_gunlock;
-        error = gfs2_dir_search(dir, &dentry->d_name, NULL, NULL);
+        error = gfs2_dir_check(dir, &dentry->d_name, NULL);
        switch (error) {
        case -ENOENT:
                break;
@@ -206,7 +206,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                        goto out_gunlock_q;
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         2 * RES_DINODE + RES_STATFS +
                                         RES_QUOTA, 0);
                if (error)
@@ -217,8 +217,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
                        goto out_ipres;
        }
-        error = gfs2_dir_add(dir, &dentry->d_name, &ip->i_num,
+        error = gfs2_dir_add(dir, &dentry->d_name, ip, IF2DT(inode->i_mode));
-                             IF2DT(inode->i_mode));
        if (error)
                goto out_end_trans;
@@ -275,7 +274,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl,  LM_ST_EXCLUSIVE, 0, ghs + 1);
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
@@ -420,7 +419,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
                dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1));
                gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent);
-                gfs2_inum_out(&dip->i_num, &dent->de_inum);
+                gfs2_inum_out(dip, dent);
                dent->de_type = cpu_to_be16(DT_DIR);
                gfs2_dinode_out(ip, di);
@@ -472,7 +471,7 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
        gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
-        rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+        rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
        gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
        error = gfs2_glock_nq_m(3, ghs);
@@ -614,7 +613,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                 * this is the case of the target file already existing
                 * so we unlink before doing the rename
                 */
-                nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+                nrgd = gfs2_blk2rgrpd(sdp, nip->i_no_addr);
                if (nrgd)
                        gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
        }
@@ -653,7 +652,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_gunlock;
-                error = gfs2_dir_search(ndir, &ndentry->d_name, NULL, NULL);
+                error = gfs2_dir_check(ndir, &ndentry->d_name, NULL);
                switch (error) {
                case -ENOENT:
                        error = 0;
@@ -712,7 +711,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                        goto out_gunlock_q;
                error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         4 * RES_DINODE + 4 * RES_LEAF +
                                         RES_STATFS + RES_QUOTA + 4, 0);
                if (error)
@@ -750,7 +749,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                if (error)
                        goto out_end_trans;
-                error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR);
+                error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR);
                if (error)
                        goto out_end_trans;
        } else {
@@ -758,7 +757,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                error = gfs2_meta_inode_buffer(ip, &dibh);
                if (error)
                        goto out_end_trans;
-                ip->i_inode.i_ctime = CURRENT_TIME_SEC;
+                ip->i_inode.i_ctime = CURRENT_TIME;
                gfs2_trans_add_bh(ip->i_gl, dibh, 1);
                gfs2_dinode_out(ip, dibh->b_data);
                brelse(dibh);
@@ -768,8 +767,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        if (error)
                goto out_end_trans;
-        error = gfs2_dir_add(ndir, &ndentry->d_name, &ip->i_num,
+        error = gfs2_dir_add(ndir, &ndentry->d_name, ip, IF2DT(ip->i_inode.i_mode));
-                             IF2DT(ip->i_inode.i_mode));
        if (error)
                goto out_end_trans;
@@ -905,8 +903,8 @@ static int setattr_size(struct inode *inode, struct iattr *attr)
        }
        error = gfs2_truncatei(ip, attr->ia_size);
-        if (error)
+        if (error && (inode->i_size != ip->i_di.di_size))
-                return error;
+                i_size_write(inode, ip->i_di.di_size);
        return error;
 }
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 485ce3d49923..603d940f1159 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -326,8 +326,10 @@ static void gfs2_clear_inode(struct inode *inode)
                gfs2_glock_schedule_for_reclaim(ip->i_gl);
                gfs2_glock_put(ip->i_gl);
                ip->i_gl = NULL;
-                if (ip->i_iopen_gh.gh_gl)
+                if (ip->i_iopen_gh.gh_gl) {
+                        ip->i_iopen_gh.gh_gl->gl_object = NULL;
                        gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+                }
        }
 }
@@ -422,13 +424,13 @@ static void gfs2_delete_inode(struct inode *inode)
        if (!inode->i_private)
                goto out;
-        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
+        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
        if (unlikely(error)) {
                gfs2_glock_dq_uninit(&ip->i_iopen_gh);
                goto out;
        }
-        gfs2_glock_dq(&ip->i_iopen_gh);
+        gfs2_glock_dq_wait(&ip->i_iopen_gh);
        gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
        error = gfs2_glock_nq(&ip->i_iopen_gh);
        if (error)
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index aa0dbd2aac1b..404b7cc9f8c4 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -66,7 +66,7 @@ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
        if (error)
                goto out_gunlock_q;
-        error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
+        error = gfs2_trans_begin(sdp, al->al_rgd->rd_length +
                                 ind_blocks + RES_DINODE +
                                 RES_STATFS + RES_QUOTA, 0);
        if (error)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index c186857e48a8..6e546ee8f3d4 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -66,6 +66,18 @@
 #define QUOTA_USER 1
 #define QUOTA_GROUP 0
+struct gfs2_quota_host {
+        u64 qu_limit;
+        u64 qu_warn;
+        s64 qu_value;
+};
+struct gfs2_quota_change_host {
+        u64 qc_change;
+        u32 qc_flags; /* GFS2_QCF_... */
+        u32 qc_id;
+};
 static u64 qd2offset(struct gfs2_quota_data *qd)
 {
        u64 offset;
@@ -561,6 +573,25 @@ static void do_qc(struct gfs2_quota_data *qd, s64 change)
        mutex_unlock(&sdp->sd_quota_mutex);
 }
+static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
+{
+        const struct gfs2_quota *str = buf;
+        qu->qu_limit = be64_to_cpu(str->qu_limit);
+        qu->qu_warn = be64_to_cpu(str->qu_warn);
+        qu->qu_value = be64_to_cpu(str->qu_value);
+}
+static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf)
+{
+        struct gfs2_quota *str = buf;
+        str->qu_limit = cpu_to_be64(qu->qu_limit);
+        str->qu_warn = cpu_to_be64(qu->qu_warn);
+        str->qu_value = cpu_to_be64(qu->qu_value);
+        memset(&str->qu_reserved, 0, sizeof(str->qu_reserved));
+}
 /**
 * gfs2_adjust_quota
 *
@@ -573,12 +604,13 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        struct inode *inode = &ip->i_inode;
        struct address_space *mapping = inode->i_mapping;
        unsigned long index = loc >> PAGE_CACHE_SHIFT;
-        unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+        unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
        unsigned blocksize, iblock, pos;
        struct buffer_head *bh;
        struct page *page;
        void *kaddr;
-        __be64 *ptr;
+        char *ptr;
+        struct gfs2_quota_host qp;
        s64 value;
        int err = -EIO;
@@ -620,13 +652,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        kaddr = kmap_atomic(page, KM_USER0);
        ptr = kaddr + offset;
-        value = (s64)be64_to_cpu(*ptr) + change;
+        gfs2_quota_in(&qp, ptr);
-        *ptr = cpu_to_be64(value);
+        qp.qu_value += change;
+        value = qp.qu_value;
+        gfs2_quota_out(&qp, ptr);
        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
        err = 0;
        qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
        qd->qd_qb.qb_value = cpu_to_be64(value);
+        ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_magic = cpu_to_be32(GFS2_MAGIC);
+        ((struct gfs2_quota_lvb*)(qd->qd_gl->gl_lvb))->qb_value = cpu_to_be64(value);
 unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -689,7 +725,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                        goto out_alloc;
                error = gfs2_trans_begin(sdp,
-                                         al->al_rgd->rd_ri.ri_length +
+                                         al->al_rgd->rd_length +
                                         num_qd * data_blocks +
                                         nalloc * ind_blocks +
                                         RES_DINODE + num_qd +
@@ -709,7 +745,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
                offset = qd2offset(qd);
                error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
                                          (struct gfs2_quota_data *)
-                                          qd->qd_gl->gl_lvb);
+                                          qd);
                if (error)
                        goto out_end_trans;
@@ -1050,6 +1086,15 @@ int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
        return error;
 }
+static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
+{
+        const struct gfs2_quota_change *str = buf;
+        qc->qc_change = be64_to_cpu(str->qc_change);
+        qc->qc_flags = be32_to_cpu(str->qc_flags);
+        qc->qc_id = be32_to_cpu(str->qc_id);
+}
 int gfs2_quota_init(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 8bc182c7e2ef..5ada38c99a2c 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -116,6 +116,22 @@ void gfs2_revoke_clean(struct gfs2_sbd *sdp)
        }
 }
+static int gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
+{
+        const struct gfs2_log_header *str = buf;
+        if (str->lh_header.mh_magic != cpu_to_be32(GFS2_MAGIC) ||
+            str->lh_header.mh_type != cpu_to_be32(GFS2_METATYPE_LH))
+                return 1;
+        lh->lh_sequence = be64_to_cpu(str->lh_sequence);
+        lh->lh_flags = be32_to_cpu(str->lh_flags);
+        lh->lh_tail = be32_to_cpu(str->lh_tail);
+        lh->lh_blkno = be32_to_cpu(str->lh_blkno);
+        lh->lh_hash = be32_to_cpu(str->lh_hash);
+        return 0;
+}
 /**
 * get_log_header - read the log header for a given segment
 * @jd: the journal
@@ -147,12 +163,10 @@ static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
                                             sizeof(u32));
        hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
        hash ^= (u32)~0;
-        gfs2_log_header_in(&lh, bh->b_data);
+        error = gfs2_log_header_in(&lh, bh->b_data);
        brelse(bh);
-        if (lh.lh_header.mh_magic != GFS2_MAGIC ||
+        if (error || lh.lh_blkno != blk || lh.lh_hash != hash)
-            lh.lh_header.mh_type != GFS2_METATYPE_LH ||
-            lh.lh_blkno != blk || lh.lh_hash != hash)
                return 1;
        *head = lh;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1727f5012efe..e4e040625153 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1,6 +1,6 @@
 /*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
@@ -28,6 +28,7 @@
 #include "ops_file.h"
 #include "util.h"
 #include "log.h"
+#include "inode.h"
 #define BFITNOENT ((u32)~0)
@@ -50,6 +51,9 @@ static const char valid_change[16] = {
                1, 0, 0, 0
 };
+static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
+                        unsigned char old_state, unsigned char new_state);
 /**
 * gfs2_setbit - Set a bit in the bitmaps
 * @buffer: the buffer that holds the bitmaps
@@ -204,7 +208,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_bitmap *bi = NULL;
-        u32 length = rgd->rd_ri.ri_length;
+        u32 length = rgd->rd_length;
        u32 count[4], tmp;
        int buf, x;
@@ -227,7 +231,7 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
                return;
        }
-        tmp = rgd->rd_ri.ri_data -
+        tmp = rgd->rd_data -
                rgd->rd_rg.rg_free -
                rgd->rd_rg.rg_dinodes;
        if (count[1] + count[2] != tmp) {
@@ -253,10 +257,10 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
 }
-static inline int rgrp_contains_block(struct gfs2_rindex_host *ri, u64 block)
+static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 {
-        u64 first = ri->ri_data0;
+        u64 first = rgd->rd_data0;
-        u64 last = first + ri->ri_data;
+        u64 last = first + rgd->rd_data;
        return first <= block && block < last;
 }
@@ -275,7 +279,7 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
        spin_lock(&sdp->sd_rindex_spin);
        list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
-                if (rgrp_contains_block(&rgd->rd_ri, blk)) {
+                if (rgrp_contains_block(rgd, blk)) {
                        list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
                        spin_unlock(&sdp->sd_rindex_spin);
                        return rgd;
@@ -354,6 +358,15 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
        mutex_unlock(&sdp->sd_rindex_mutex);
 }
+static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
+{
+        printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
+        printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length);
+        printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
+        printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data);
+        printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes);
+}
 /**
 * gfs2_compute_bitstructs - Compute the bitmap sizes
 * @rgd: The resource group descriptor
@@ -367,7 +380,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_bitmap *bi;
-        u32 length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+        u32 length = rgd->rd_length; /* # blocks in hdr & bitmap */
        u32 bytes_left, bytes;
        int x;
@@ -378,7 +391,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
        if (!rgd->rd_bits)
                return -ENOMEM;
-        bytes_left = rgd->rd_ri.ri_bitbytes;
+        bytes_left = rgd->rd_bitbytes;
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
@@ -399,14 +412,14 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
                } else if (x + 1 == length) {
                        bytes = bytes_left;
                        bi->bi_offset = sizeof(struct gfs2_meta_header);
-                        bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
                        bi->bi_len = bytes;
                /* other blocks */
                } else {
                        bytes = sdp->sd_sb.sb_bsize -
                                sizeof(struct gfs2_meta_header);
                        bi->bi_offset = sizeof(struct gfs2_meta_header);
-                        bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+                        bi->bi_start = rgd->rd_bitbytes - bytes_left;
                        bi->bi_len = bytes;
                }
@@ -418,9 +431,9 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
                return -EIO;
        }
        bi = rgd->rd_bits + (length - 1);
-        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) {
+        if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_data) {
                if (gfs2_consist_rgrpd(rgd)) {
-                        gfs2_rindex_print(&rgd->rd_ri);
+                        gfs2_rindex_print(rgd);
                        fs_err(sdp, "start=%u len=%u offset=%u\n",
                               bi->bi_start, bi->bi_len, bi->bi_offset);
                }
@@ -431,9 +444,104 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
 }
 /**
- * gfs2_ri_update - Pull in a new resource index from the disk
+ * gfs2_ri_total - Total up the file system space, according to the rindex.
+ *
+ */
+u64 gfs2_ri_total(struct gfs2_sbd *sdp)
+{
+        u64 total_data = 0;     
+        struct inode *inode = sdp->sd_rindex;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        char buf[sizeof(struct gfs2_rindex)];
+        struct file_ra_state ra_state;
+        int error, rgrps;
+        mutex_lock(&sdp->sd_rindex_mutex);
+        file_ra_state_init(&ra_state, inode->i_mapping);
+        for (rgrps = 0;; rgrps++) {
+                loff_t pos = rgrps * sizeof(struct gfs2_rindex);
+                if (pos + sizeof(struct gfs2_rindex) >= ip->i_di.di_size)
+                        break;
+                error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+                                           sizeof(struct gfs2_rindex));
+                if (error != sizeof(struct gfs2_rindex))
+                        break;
+                total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);
+        }
+        mutex_unlock(&sdp->sd_rindex_mutex);
+        return total_data;
+}
+static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+{
+        const struct gfs2_rindex *str = buf;
+        rgd->rd_addr = be64_to_cpu(str->ri_addr);
+        rgd->rd_length = be32_to_cpu(str->ri_length);
+        rgd->rd_data0 = be64_to_cpu(str->ri_data0);
+        rgd->rd_data = be32_to_cpu(str->ri_data);
+        rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+}
+/**
+ * read_rindex_entry - Pull in a new resource index entry from the disk
 * @gl: The glock covering the rindex inode
 *
+ * Returns: 0 on success, error code otherwise
+ */
+static int read_rindex_entry(struct gfs2_inode *ip,
+                             struct file_ra_state *ra_state)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+        char buf[sizeof(struct gfs2_rindex)];
+        int error;
+        struct gfs2_rgrpd *rgd;
+        error = gfs2_internal_read(ip, ra_state, buf, &pos,
+                                   sizeof(struct gfs2_rindex));
+        if (!error)
+                return 0;
+        if (error != sizeof(struct gfs2_rindex)) {
+                if (error > 0)
+                        error = -EIO;
+                return error;
+        }
+        rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+        error = -ENOMEM;
+        if (!rgd)
+                return error;
+        mutex_init(&rgd->rd_mutex);
+        lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
+        rgd->rd_sbd = sdp;
+        list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
+        list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+        gfs2_rindex_in(rgd, buf);
+        error = compute_bitstructs(rgd);
+        if (error)
+                return error;
+        error = gfs2_glock_get(sdp, rgd->rd_addr,
+                               &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+        if (error)
+                return error;
+        rgd->rd_gl->gl_object = rgd;
+        rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+        rgd->rd_flags |= GFS2_RDF_CHECK;
+        return error;
+}
+/**
+ * gfs2_ri_update - Pull in a new resource index from the disk
+ * @ip: pointer to the rindex inode
+ *
 * Returns: 0 on successful update, error code otherwise
 */
@@ -441,13 +549,11 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct inode *inode = &ip->i_inode;
-        struct gfs2_rgrpd *rgd;
-        char buf[sizeof(struct gfs2_rindex)];
        struct file_ra_state ra_state;
-        u64 junk = ip->i_di.di_size;
+        u64 rgrp_count = ip->i_di.di_size;
        int error;
-        if (do_div(junk, sizeof(struct gfs2_rindex))) {
+        if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
                gfs2_consist_inode(ip);
                return -EIO;
        }
@@ -455,50 +561,50 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
        clear_rgrpdi(sdp);
        file_ra_state_init(&ra_state, inode->i_mapping);
-        for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+        for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
-                loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
+                error = read_rindex_entry(ip, &ra_state);
-                error = gfs2_internal_read(ip, &ra_state, buf, &pos,
+                if (error) {
-                                            sizeof(struct gfs2_rindex));
+                        clear_rgrpdi(sdp);
-                if (!error)
+                        return error;
-                        break;
-                if (error != sizeof(struct gfs2_rindex)) {
-                        if (error > 0)
-                                error = -EIO;
-                        goto fail;
                }
+        }
-                rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_NOFS);
+        sdp->sd_rindex_vn = ip->i_gl->gl_vn;
-                error = -ENOMEM;
+        return 0;
-                if (!rgd)
+}
-                        goto fail;
-                mutex_init(&rgd->rd_mutex);
-                lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
-                rgd->rd_sbd = sdp;
-                list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
-                list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-                gfs2_rindex_in(&rgd->rd_ri, buf);
-                error = compute_bitstructs(rgd);
-                if (error)
-                        goto fail;
-                error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr,
+/**
-                                       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
+ * gfs2_ri_update_special - Pull in a new resource index from the disk
-                if (error)
+ *
-                        goto fail;
+ * This is a special version that's safe to call from gfs2_inplace_reserve_i.
+ * In this case we know that we don't have any resource groups in memory yet.
+ *
+ * @ip: pointer to the rindex inode
+ *
+ * Returns: 0 on successful update, error code otherwise
+ */
+static int gfs2_ri_update_special(struct gfs2_inode *ip)
+{
+        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+        struct inode *inode = &ip->i_inode;
+        struct file_ra_state ra_state;
+        int error;
-                rgd->rd_gl->gl_object = rgd;
+        file_ra_state_init(&ra_state, inode->i_mapping);
-                rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
+        for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
+                /* Ignore partials */
+                if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
+                    ip->i_di.di_size)
+                        break;
+                error = read_rindex_entry(ip, &ra_state);
+                if (error) {
+                        clear_rgrpdi(sdp);
+                        return error;
+                }
        }
        sdp->sd_rindex_vn = ip->i_gl->gl_vn;
        return 0;
-fail:
-        clear_rgrpdi(sdp);
-        return error;
 }
 /**
@@ -543,6 +649,28 @@ int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
        return error;
 }
+static void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
+{
+        const struct gfs2_rgrp *str = buf;
+        rg->rg_flags = be32_to_cpu(str->rg_flags);
+        rg->rg_free = be32_to_cpu(str->rg_free);
+        rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
+        rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
+}
+static void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
+{
+        struct gfs2_rgrp *str = buf;
+        str->rg_flags = cpu_to_be32(rg->rg_flags);
+        str->rg_free = cpu_to_be32(rg->rg_free);
+        str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
+        str->__pad = cpu_to_be32(0);
+        str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
+        memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
+}
 /**
 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
 * @rgd: the struct gfs2_rgrpd describing the RG to read in
@@ -557,7 +685,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        struct gfs2_glock *gl = rgd->rd_gl;
-        unsigned int length = rgd->rd_ri.ri_length;
+        unsigned int length = rgd->rd_length;
        struct gfs2_bitmap *bi;
        unsigned int x, y;
        int error;
@@ -575,7 +703,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
        for (x = 0; x < length; x++) {
                bi = rgd->rd_bits + x;
-                error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, 0, &bi->bi_bh);
+                error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
                if (error)
                        goto fail;
        }
@@ -637,7 +765,7 @@ void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-        int x, length = rgd->rd_ri.ri_length;
+        int x, length = rgd->rd_length;
        spin_lock(&sdp->sd_rindex_spin);
        gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
@@ -660,7 +788,7 @@ void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
 void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
-        unsigned int length = rgd->rd_ri.ri_length;
+        unsigned int length = rgd->rd_length;
        unsigned int x;
        for (x = 0; x < length; x++) {
@@ -722,6 +850,38 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 }
 /**
+ * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
+ * @rgd: The rgrp
+ *
+ * Returns: The inode, if one has been found
+ */
+static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked)
+{
+        struct inode *inode;
+        u32 goal = 0;
+        u64 no_addr;
+        for(;;) {
+                goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
+                                    GFS2_BLKST_UNLINKED);
+                if (goal == 0)
+                        return 0;
+                no_addr = goal + rgd->rd_data0;
+                if (no_addr <= *last_unlinked)
+                        continue;
+                *last_unlinked = no_addr;
+                inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN,
+                                        no_addr, -1);
+                if (!IS_ERR(inode))
+                        return inode;
+        }
+        rgd->rd_flags &= ~GFS2_RDF_CHECK;
+        return NULL;
+}
+/**
 * recent_rgrp_first - get first RG from "recent" list
 * @sdp: The GFS2 superblock
 * @rglast: address of the rgrp used last
@@ -743,7 +903,7 @@ static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp,
                goto first;
        list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) {
-                if (rgd->rd_ri.ri_addr == rglast)
+                if (rgd->rd_addr == rglast)
                        goto out;
        }
@@ -882,8 +1042,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
 * Returns: errno
 */
-static int get_local_rgrp(struct gfs2_inode *ip)
+static struct inode *get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
+        struct inode *inode = NULL;
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
        struct gfs2_alloc *al = &ip->i_alloc;
@@ -903,7 +1064,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
+                        if (rgd->rd_flags & GFS2_RDF_CHECK)
+                                inode = try_rgrp_unlink(rgd, last_unlinked);
                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                        if (inode)
+                                return inode;
                        rgd = recent_rgrp_next(rgd, 1);
                        break;
@@ -912,7 +1077,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                        break;
                default:
-                        return error;
+                        return ERR_PTR(error);
                }
        }
@@ -927,7 +1092,11 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
+                        if (rgd->rd_flags & GFS2_RDF_CHECK)
+                                inode = try_rgrp_unlink(rgd, last_unlinked);
                        gfs2_glock_dq_uninit(&al->al_rgd_gh);
+                        if (inode)
+                                return inode;
                        break;
                case GLR_TRYFAILED:
@@ -935,7 +1104,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                        break;
                default:
-                        return error;
+                        return ERR_PTR(error);
                }
                rgd = gfs2_rgrpd_get_next(rgd);
@@ -944,7 +1113,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
                if (rgd == begin) {
                        if (++loops >= 3)
-                                return -ENOSPC;
+                                return ERR_PTR(-ENOSPC);
                        if (!skipped)
                                loops++;
                        flags = 0;
@@ -954,7 +1123,7 @@ static int get_local_rgrp(struct gfs2_inode *ip)
        }
 out:
-        ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
+        ip->i_last_rg_alloc = rgd->rd_addr;
        if (begin) {
                recent_rgrp_add(rgd);
@@ -964,7 +1133,7 @@ out:
                forward_rgrp_set(sdp, rgd);
        }
-        return 0;
+        return NULL;
 }
 /**
@@ -978,19 +1147,33 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_alloc *al = &ip->i_alloc;
-        int error;
+        struct inode *inode;
+        int error = 0;
+        u64 last_unlinked = 0;
        if (gfs2_assert_warn(sdp, al->al_requested))
                return -EINVAL;
-        error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+try_again:
+        /* We need to hold the rindex unless the inode we're using is
+           the rindex itself, in which case it's already held. */
+        if (ip != GFS2_I(sdp->sd_rindex))
+                error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
+        else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */
+                error = gfs2_ri_update_special(ip);
        if (error)
                return error;
-        error = get_local_rgrp(ip);
+        inode = get_local_rgrp(ip, &last_unlinked);
-        if (error) {
+        if (inode) {
-                gfs2_glock_dq_uninit(&al->al_ri_gh);
+                if (ip != GFS2_I(sdp->sd_rindex))
-                return error;
+                        gfs2_glock_dq_uninit(&al->al_ri_gh);
+                if (IS_ERR(inode))
+                        return PTR_ERR(inode);
+                iput(inode);
+                gfs2_log_flush(sdp, NULL);
+                goto try_again;
        }
        al->al_file = file;
@@ -1019,7 +1202,8 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
        al->al_rgd = NULL;
        gfs2_glock_dq_uninit(&al->al_rgd_gh);
-        gfs2_glock_dq_uninit(&al->al_ri_gh);
+        if (ip != GFS2_I(sdp->sd_rindex))
+                gfs2_glock_dq_uninit(&al->al_ri_gh);
 }
 /**
@@ -1037,8 +1221,8 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
        unsigned int buf;
        unsigned char type;
-        length = rgd->rd_ri.ri_length;
+        length = rgd->rd_length;
-        rgrp_block = block - rgd->rd_ri.ri_data0;
+        rgrp_block = block - rgd->rd_data0;
        for (buf = 0; buf < length; buf++) {
                bi = rgd->rd_bits + buf;
@@ -1077,10 +1261,10 @@ unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
 */
 static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
-                             unsigned char old_state, unsigned char new_state)
+                        unsigned char old_state, unsigned char new_state)
 {
        struct gfs2_bitmap *bi = NULL;
-        u32 length = rgd->rd_ri.ri_length;
+        u32 length = rgd->rd_length;
        u32 blk = 0;
        unsigned int buf, x;
@@ -1118,17 +1302,18 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
                goal = 0;
        }
-        if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length))
+        if (old_state != new_state) {
-                blk = 0;
+                gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT);
-        gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
+                gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
-        gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
+                gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset,
-                    bi->bi_len, blk, new_state);
-        if (bi->bi_clone)
-                gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
                            bi->bi_len, blk, new_state);
+                if (bi->bi_clone)
+                        gfs2_setbit(rgd, bi->bi_clone + bi->bi_offset,
+                                    bi->bi_len, blk, new_state);
+        }
-        return bi->bi_start * GFS2_NBBY + blk;
+        return (blk == BFITNOENT) ? 0 : (bi->bi_start * GFS2_NBBY) + blk;
 }
 /**
@@ -1156,9 +1341,9 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
                return NULL;
        }
-        length = rgd->rd_ri.ri_length;
+        length = rgd->rd_length;
-        rgrp_blk = bstart - rgd->rd_ri.ri_data0;
+        rgrp_blk = bstart - rgd->rd_data0;
        while (blen--) {
                for (buf = 0; buf < length; buf++) {
@@ -1202,15 +1387,15 @@ u64 gfs2_alloc_data(struct gfs2_inode *ip)
        u32 goal, blk;
        u64 block;
-        if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data))
+        if (rgrp_contains_block(rgd, ip->i_di.di_goal_data))
-                goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0;
+                goal = ip->i_di.di_goal_data - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc_data;
        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
        rgd->rd_last_alloc_data = blk;
-        block = rgd->rd_ri.ri_data0 + blk;
+        block = rgd->rd_data0 + blk;
        ip->i_di.di_goal_data = block;
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1246,15 +1431,15 @@ u64 gfs2_alloc_meta(struct gfs2_inode *ip)
        u32 goal, blk;
        u64 block;
-        if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta))
+        if (rgrp_contains_block(rgd, ip->i_di.di_goal_meta))
-                goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0;
+                goal = ip->i_di.di_goal_meta - rgd->rd_data0;
        else
                goal = rgd->rd_last_alloc_meta;
        blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED);
        rgd->rd_last_alloc_meta = blk;
-        block = rgd->rd_ri.ri_data0 + blk;
+        block = rgd->rd_data0 + blk;
        ip->i_di.di_goal_meta = block;
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
@@ -1296,7 +1481,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
        rgd->rd_last_alloc_meta = blk;
-        block = rgd->rd_ri.ri_data0 + blk;
+        block = rgd->rd_data0 + blk;
        gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free);
        rgd->rd_rg.rg_free--;
@@ -1379,7 +1564,7 @@ void gfs2_unlink_di(struct inode *inode)
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_rgrpd *rgd;
-        u64 blkno = ip->i_num.no_addr;
+        u64 blkno = ip->i_no_addr;
        rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
        if (!rgd)
@@ -1414,9 +1599,9 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
-        gfs2_free_uninit_di(rgd, ip->i_num.no_addr);
+        gfs2_free_uninit_di(rgd, ip->i_no_addr);
        gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
-        gfs2_meta_wipe(ip, ip->i_num.no_addr, 1);
+        gfs2_meta_wipe(ip, ip->i_no_addr, 1);
 }
 /**
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b01e0cfc99b5..b4c6adfc6f2e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -65,5 +65,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
 void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
                      int flags);
 void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
+u64 gfs2_ri_total(struct gfs2_sbd *sdp);
 #endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 4fdda974dc83..f916b9740c75 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -95,8 +95,8 @@ int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
 {
        unsigned int x;
-        if (sb->sb_header.mh_magic != GFS2_MAGIC ||
+        if (sb->sb_magic != GFS2_MAGIC ||
-            sb->sb_header.mh_type != GFS2_METATYPE_SB) {
+            sb->sb_type != GFS2_METATYPE_SB) {
                if (!silent)
                        printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
                return -EINVAL;
@@ -174,10 +174,31 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
        return 0;
 }
+static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
+{
+        const struct gfs2_sb *str = buf;
+        sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic);
+        sb->sb_type = be32_to_cpu(str->sb_header.mh_type);
+        sb->sb_format = be32_to_cpu(str->sb_header.mh_format);
+        sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
+        sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
+        sb->sb_bsize = be32_to_cpu(str->sb_bsize);
+        sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
+        sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr);
+        sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino);
+        sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr);
+        sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino);
+        memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
+        memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
+}
 /**
 * gfs2_read_super - Read the gfs2 super block from disk
- * @sb: The VFS super block
+ * @sdp: The GFS2 super block
 * @sector: The location of the super block
+ * @error: The error code to return
 *
 * This uses the bio functions to read the super block from disk
 * because we want to be 100% sure that we never read cached data.
@@ -189,17 +210,19 @@ static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
 * the master directory (contains pointers to journals etc) and the
 * root directory.
 *
- * Returns: A page containing the sb or NULL
+ * Returns: 0 on success or error
 */
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 {
+        struct super_block *sb = sdp->sd_vfs;
+        struct gfs2_sb *p;
        struct page *page;
        struct bio *bio;
        page = alloc_page(GFP_KERNEL);
        if (unlikely(!page))
-                return NULL;
+                return -ENOBUFS;
        ClearPageUptodate(page);
        ClearPageDirty(page);
@@ -208,7 +231,7 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
        bio = bio_alloc(GFP_KERNEL, 1);
        if (unlikely(!bio)) {
                __free_page(page);
-                return NULL;
+                return -ENOBUFS;
        }
        bio->bi_sector = sector * (sb->s_blocksize >> 9);
@@ -222,9 +245,13 @@ struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
        bio_put(bio);
        if (!PageUptodate(page)) {
                __free_page(page);
-                return NULL;
+                return -EIO;
        }
-        return page;
+        p = kmap(page);
+        gfs2_sb_in(&sdp->sd_sb, p);
+        kunmap(page);
+        __free_page(page);
+        return 0;
 }
 /**
@@ -241,19 +268,13 @@ int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
        u32 tmp_blocks;
        unsigned int x;
        int error;
-        struct page *page;
-        char *sb;
-        page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
+        error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
-        if (!page) {
+        if (error) {
                if (!silent)
                        fs_err(sdp, "can't read superblock\n");
-                return -EIO;
+                return error;
        }
-        sb = kmap(page);
-        gfs2_sb_in(&sdp->sd_sb, sb);
-        kunmap(page);
-        __free_page(page);
        error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
        if (error)
@@ -360,7 +381,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
                name.len = sprintf(buf, "journal%u", sdp->sd_journals);
                name.hash = gfs2_disk_hash(name.name, name.len);
-                error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
+                error = gfs2_dir_check(sdp->sd_jindex, &name, NULL);
                if (error == -ENOENT) {
                        error = 0;
                        break;
@@ -593,6 +614,24 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
        return error;
 }
+static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
+{
+        const struct gfs2_statfs_change *str = buf;
+        sc->sc_total = be64_to_cpu(str->sc_total);
+        sc->sc_free = be64_to_cpu(str->sc_free);
+        sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
+}
+static void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
+{
+        struct gfs2_statfs_change *str = buf;
+        str->sc_total = cpu_to_be64(sc->sc_total);
+        str->sc_free = cpu_to_be64(sc->sc_free);
+        str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
+}
 int gfs2_statfs_init(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -772,7 +811,7 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
                            struct gfs2_statfs_change_host *sc)
 {
        gfs2_rgrp_verify(rgd);
-        sc->sc_total += rgd->rd_ri.ri_data;
+        sc->sc_total += rgd->rd_data;
        sc->sc_free += rgd->rd_rg.rg_free;
        sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
        return 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index e590b2df11dc..60a870e430be 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -16,7 +16,7 @@ void gfs2_tune_init(struct gfs2_tune *gt);
 int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
 int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
-struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
+int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector);
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
 {
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 601eaa1b9ed6..424a0774eda8 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -115,8 +115,8 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
                "GFS2: fsid=%s:   inode = %llu %llu\n"
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname,
-                sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
+                sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino,
-                (unsigned long long)ip->i_num.no_addr,
+                (unsigned long long)ip->i_no_addr,
                sdp->sd_fsname, function, file, line);
        return rv;
 }
@@ -137,7 +137,7 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
                "GFS2: fsid=%s:   RG = %llu\n"
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname,
-                sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
+                sdp->sd_fsname, (unsigned long long)rgd->rd_addr,
                sdp->sd_fsname, function, file, line);
        return rv;
 }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9a934db0bd8a..bc835f272a6e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -607,7 +607,7 @@ static const struct file_operations hfs_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .fsync          = file_fsync,
        .open           = hfs_file_open,
        .release        = hfs_file_release,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 45dab5d6cc10..409ce5429c91 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -288,7 +288,7 @@ static const struct file_operations hfsplus_file_operations = {
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .fsync          = file_fsync,
        .open           = hfsplus_file_open,
        .release        = hfsplus_file_release,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 8286491dbf31..c77862032e84 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -390,7 +390,7 @@ int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 static const struct file_operations hostfs_file_fops = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .write          = do_sync_write,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index b4eafc0f1e54..5b53e5c5d8df 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -129,7 +129,7 @@ const struct file_operations hpfs_file_ops =
        .mmap           = generic_file_mmap,
        .release        = hpfs_file_release,
        .fsync          = hpfs_file_fsync,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations hpfs_file_iops =
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 99871279a1ed..c2530197be0c 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -47,7 +47,7 @@ const struct file_operations jffs2_file_operations =
        .ioctl =        jffs2_ioctl,
        .mmap =         generic_file_readonly_mmap,
        .fsync =        jffs2_fsync,
-        .sendfile =     generic_file_sendfile
+        .splice_read =  generic_file_splice_read,
 };
 /* jffs2_file_inode_operations */
diff --git a/fs/jfs/endian24.h b/fs/jfs/endian24.h
index 79494c4f2b10..fa92f7f1d0d0 100644
--- a/fs/jfs/endian24.h
+++ b/fs/jfs/endian24.h
@@ -29,7 +29,7 @@
        __u32 __x = (x); \
        ((__u32)( \
                ((__x & (__u32)0x000000ffUL) << 16) | \
-                 (__x & (__u32)0x0000ff00UL)        | \
+                 (__x & (__u32)0x0000ff00UL)        | \
                ((__x & (__u32)0x00ff0000UL) >> 16) )); \
 })
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index f7f8eff19b7b..87eb93694af7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -108,7 +108,6 @@ const struct file_operations jfs_file_operations = {
        .aio_read       = generic_file_aio_read,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
        .fsync          = jfs_fsync,
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 9c5d59632aac..887f5759e536 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -26,34 +26,6 @@
 #include "jfs_filsys.h"
 #include "jfs_debug.h"
-#ifdef CONFIG_JFS_DEBUG
-void dump_mem(char *label, void *data, int length)
-{
-        int i, j;
-        int *intptr = data;
-        char *charptr = data;
-        char buf[10], line[80];
-        printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
-               data);
-        for (i = 0; i < length; i += 16) {
-                line[0] = 0;
-                for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
-                        sprintf(buf, " %08x", intptr[i / 4 + j]);
-                        strcat(line, buf);
-                }
-                buf[0] = ' ';
-                buf[2] = 0;
-                for (j = 0; (j < 16) && (i + j < length); j++) {
-                        buf[1] =
-                            isprint(charptr[i + j]) ? charptr[i + j] : '.';
-                        strcat(line, buf);
-                }
-                printk("%s\n", line);
-        }
-}
-#endif
 #ifdef PROC_FS_JFS /* see jfs_debug.h */
 static struct proc_dir_entry *base;
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index 7378798f0b21..044c1e654cc0 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -62,7 +62,6 @@ extern void jfs_proc_clean(void);
 extern int jfsloglevel;
-extern void dump_mem(char *label, void *data, int length);
 extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 /* information message: e.g., configuration, major event */
@@ -94,7 +93,6 @@ extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
 *      ---------
 */
 #else                           /* CONFIG_JFS_DEBUG */
-#define dump_mem(label,data,length) do {} while (0)
 #define ASSERT(p) do {} while (0)
 #define jfs_info(fmt, arg...) do {} while (0)
 #define jfs_debug(fmt, arg...) do {} while (0)
diff --git a/fs/jfs/jfs_dinode.h b/fs/jfs/jfs_dinode.h
index 40b20111383c..c387540d3425 100644
--- a/fs/jfs/jfs_dinode.h
+++ b/fs/jfs/jfs_dinode.h
@@ -19,23 +19,23 @@
 #define _H_JFS_DINODE
 /*
- *      jfs_dinode.h: on-disk inode manager
+ *      jfs_dinode.h: on-disk inode manager
 */
-#define INODESLOTSIZE           128
+#define INODESLOTSIZE           128
-#define L2INODESLOTSIZE         7
+#define L2INODESLOTSIZE         7
-#define log2INODESIZE           9       /* log2(bytes per dinode) */
+#define log2INODESIZE           9       /* log2(bytes per dinode) */
 /*
- *      on-disk inode : 512 bytes
+ *      on-disk inode : 512 bytes
 *
 * note: align 64-bit fields on 8-byte boundary.
 */
 struct dinode {
        /*
-         *      I. base area (128 bytes)
+         *      I. base area (128 bytes)
-         *      ------------------------
+         *      ------------------------
         *
         * define generic/POSIX attributes
         */
@@ -70,16 +70,16 @@ struct dinode {
        __le32 di_acltype;      /* 4: Type of ACL */
        /*
-         *      Extension Areas.
+         *      Extension Areas.
         *
-         *      Historically, the inode was partitioned into 4 128-byte areas,
+         *      Historically, the inode was partitioned into 4 128-byte areas,
-         *      the last 3 being defined as unions which could have multiple
+         *      the last 3 being defined as unions which could have multiple
-         *      uses.  The first 96 bytes had been completely unused until
+         *      uses.  The first 96 bytes had been completely unused until
-         *      an index table was added to the directory.  It is now more
+         *      an index table was added to the directory.  It is now more
-         *      useful to describe the last 3/4 of the inode as a single
+         *      useful to describe the last 3/4 of the inode as a single
-         *      union.  We would probably be better off redesigning the
+         *      union.  We would probably be better off redesigning the
-         *      entire structure from scratch, but we don't want to break
+         *      entire structure from scratch, but we don't want to break
-         *      commonality with OS/2's JFS at this time.
+         *      commonality with OS/2's JFS at this time.
         */
        union {
                struct {
@@ -95,7 +95,7 @@ struct dinode {
                } _dir;                                 /* (384) */
 #define di_dirtable     u._dir._table
 #define di_dtroot       u._dir._dtroot
-#define di_parent       di_dtroot.header.idotdot
+#define di_parent       di_dtroot.header.idotdot
 #define di_DASD         di_dtroot.header.DASD
                struct {
@@ -127,14 +127,14 @@ struct dinode {
 #define di_inlinedata   u._file._u2._special._u
 #define di_rdev         u._file._u2._special._u._rdev
 #define di_fastsymlink  u._file._u2._special._u._fastsymlink
-#define di_inlineea     u._file._u2._special._inlineea
+#define di_inlineea     u._file._u2._special._inlineea
        } u;
 };
 /* extended mode bits (on-disk inode di_mode) */
-#define IFJOURNAL       0x00010000      /* journalled file */
+#define IFJOURNAL       0x00010000      /* journalled file */
-#define ISPARSE         0x00020000      /* sparse file enabled */
+#define ISPARSE         0x00020000      /* sparse file enabled */
-#define INLINEEA        0x00040000      /* inline EA area free */
+#define INLINEEA        0x00040000      /* inline EA area free */
 #define ISWAPFILE       0x00800000      /* file open for pager swap space */
 /* more extended mode bits: attributes for OS/2 */
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index f3b1ebb22280..e1985066b1c6 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -154,12 +154,12 @@ static const s8 budtab[256] = {
 *              the in-core descriptor is initialized from disk.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM - insufficient memory
+ *      -ENOMEM - insufficient memory
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbMount(struct inode *ipbmap)
 {
@@ -232,11 +232,11 @@ int dbMount(struct inode *ipbmap)
 *              the memory for this descriptor is freed.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbUnmount(struct inode *ipbmap, int mounterror)
 {
@@ -320,13 +320,13 @@ int dbSync(struct inode *ipbmap)
 *              at a time.
 *
 * PARAMETERS:
- *      ip      -  pointer to in-core inode;
+ *      ip      - pointer to in-core inode;
- *      blkno   -  starting block number to be freed.
+ *      blkno   - starting block number to be freed.
- *      nblocks -  number of blocks to be freed.
+ *      nblocks - number of blocks to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 {
@@ -395,23 +395,23 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
 /*
 * NAME:        dbUpdatePMap()
 *
- * FUNCTION:    update the allocation state (free or allocate) of the
+ * FUNCTION:    update the allocation state (free or allocate) of the
 *              specified block range in the persistent block allocation map.
 *
 *              the blocks will be updated in the persistent map one
 *              dmap at a time.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
- *      free    -  'true' if block range is to be freed from the persistent
+ *      free    - 'true' if block range is to be freed from the persistent
- *                 map; 'false' if it is to   be allocated.
+ *                map; 'false' if it is to be allocated.
- *      blkno   -  starting block number of the range.
+ *      blkno   - starting block number of the range.
- *      nblocks -  number of contiguous blocks in the range.
+ *      nblocks - number of contiguous blocks in the range.
- *      tblk    -  transaction block;
+ *      tblk    - transaction block;
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int
 dbUpdatePMap(struct inode *ipbmap,
@@ -573,7 +573,7 @@ dbUpdatePMap(struct inode *ipbmap,
 /*
 * NAME:        dbNextAG()
 *
- * FUNCTION:    find the preferred allocation group for new allocations.
+ * FUNCTION:    find the preferred allocation group for new allocations.
 *
 *              Within the allocation groups, we maintain a preferred
 *              allocation group which consists of a group with at least
@@ -589,10 +589,10 @@ dbUpdatePMap(struct inode *ipbmap,
 *              empty ags around for large allocations.
 *
 * PARAMETERS:
- *      ipbmap  -  pointer to in-core inode for the block map.
+ *      ipbmap  - pointer to in-core inode for the block map.
 *
 * RETURN VALUES:
- *      the preferred allocation group number.
+ *      the preferred allocation group number.
 */
 int dbNextAG(struct inode *ipbmap)
 {
@@ -656,7 +656,7 @@ unlock:
 /*
 * NAME:        dbAlloc()
 *
- * FUNCTION:    attempt to allocate a specified number of contiguous free
+ * FUNCTION:    attempt to allocate a specified number of contiguous free
 *              blocks from the working allocation block map.
 *
 *              the block allocation policy uses hints and a multi-step
@@ -680,16 +680,16 @@ unlock:
 *              size or requests that specify no hint value.
 *
 * PARAMETERS:
- *      ip      -  pointer to in-core inode;
+ *      ip      - pointer to in-core inode;
- *      hint    - allocation hint.
+ *      hint    - allocation hint.
- *      nblocks - number of contiguous blocks in the range.
+ *      nblocks - number of contiguous blocks in the range.
- *      results - on successful return, set to the starting block number
+ *      results - on successful return, set to the starting block number
 *                of the newly allocated contiguous range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 {
@@ -706,12 +706,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
        /* assert that nblocks is valid */
        assert(nblocks > 0);
-#ifdef _STILL_TO_PORT
-        /* DASD limit check                                     F226941 */
-        if (OVER_LIMIT(ip, nblocks))
-                return -ENOSPC;
-#endif                          /* _STILL_TO_PORT */
        /* get the log2 number of blocks to be allocated.
         * if the number of blocks is not a log2 multiple,
         * it will be rounded up to the next log2 multiple.
@@ -720,7 +714,6 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
        bmp = JFS_SBI(ip->i_sb)->bmap;
-//retry:        /* serialize w.r.t.extendfs() */
        mapSize = bmp->db_mapsize;
        /* the hint should be within the map */
@@ -879,17 +872,17 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
 /*
 * NAME:        dbAllocExact()
 *
- * FUNCTION:    try to allocate the requested extent;
+ * FUNCTION:    try to allocate the requested extent;
 *
 * PARAMETERS:
- *      ip      - pointer to in-core inode;
+ *      ip      - pointer to in-core inode;
- *      blkno   - extent address;
+ *      blkno   - extent address;
- *      nblocks - extent length;
+ *      nblocks - extent length;
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 {
@@ -946,7 +939,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 /*
 * NAME:        dbReAlloc()
 *
- * FUNCTION:    attempt to extend a current allocation by a specified
+ * FUNCTION:    attempt to extend a current allocation by a specified
 *              number of blocks.
 *
 *              this routine attempts to satisfy the allocation request
@@ -959,21 +952,21 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
 *              number of blocks required.
 *
 * PARAMETERS:
- *      ip          -  pointer to in-core inode requiring allocation.
+ *      ip          -  pointer to in-core inode requiring allocation.
- *      blkno       -  starting block of the current allocation.
+ *      blkno       -  starting block of the current allocation.
- *      nblocks     -  number of contiguous blocks within the current
+ *      nblocks     -  number of contiguous blocks within the current
 *                     allocation.
- *      addnblocks  -  number of blocks to add to the allocation.
+ *      addnblocks  -  number of blocks to add to the allocation.
- *      results -      on successful return, set to the starting block number
+ *      results -      on successful return, set to the starting block number
 *                     of the existing allocation if the existing allocation
 *                     was extended in place or to a newly allocated contiguous
 *                     range if the existing allocation could not be extended
 *                     in place.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int
 dbReAlloc(struct inode *ip,
@@ -1004,7 +997,7 @@ dbReAlloc(struct inode *ip,
 /*
 * NAME:        dbExtend()
 *
- * FUNCTION:    attempt to extend a current allocation by a specified
+ * FUNCTION:    attempt to extend a current allocation by a specified
 *              number of blocks.
 *
 *              this routine attempts to satisfy the allocation request
@@ -1013,16 +1006,16 @@ dbReAlloc(struct inode *ip,
 *              immediately following the current allocation.
 *
 * PARAMETERS:
- *      ip          -  pointer to in-core inode requiring allocation.
+ *      ip          -  pointer to in-core inode requiring allocation.
- *      blkno       -  starting block of the current allocation.
+ *      blkno       -  starting block of the current allocation.
- *      nblocks     -  number of contiguous blocks within the current
+ *      nblocks     -  number of contiguous blocks within the current
 *                     allocation.
- *      addnblocks  -  number of blocks to add to the allocation.
+ *      addnblocks  -  number of blocks to add to the allocation.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 {
@@ -1109,19 +1102,19 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
 /*
 * NAME:        dbAllocNext()
 *
- * FUNCTION:    attempt to allocate the blocks of the specified block
+ * FUNCTION:    attempt to allocate the blocks of the specified block
 *              range within a dmap.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap.
+ *      dp      -  pointer to dmap.
- *      blkno   -  starting block number of the range.
+ *      blkno   -  starting block number of the range.
- *      nblocks -  number of contiguous free blocks of the range.
+ *      nblocks -  number of contiguous free blocks of the range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
 */
@@ -1233,7 +1226,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbAllocNear()
 *
- * FUNCTION:    attempt to allocate a number of contiguous free blocks near
+ * FUNCTION:    attempt to allocate a number of contiguous free blocks near
 *              a specified block (hint) within a dmap.
 *
 *              starting with the dmap leaf that covers the hint, we'll
@@ -1242,18 +1235,18 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              the desired free space.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap.
+ *      dp      -  pointer to dmap.
- *      blkno   -  block number to allocate near.
+ *      blkno   -  block number to allocate near.
- *      nblocks -  actual number of contiguous free blocks desired.
+ *      nblocks -  actual number of contiguous free blocks desired.
- *      l2nb    -  log2 number of contiguous free blocks desired.
+ *      l2nb    -  log2 number of contiguous free blocks desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
 */
@@ -1316,7 +1309,7 @@ dbAllocNear(struct bmap * bmp,
 /*
 * NAME:        dbAllocAG()
 *
- * FUNCTION:    attempt to allocate the specified number of contiguous
+ * FUNCTION:    attempt to allocate the specified number of contiguous
 *              free blocks within the specified allocation group.
 *
 *              unless the allocation group size is equal to the number
@@ -1353,17 +1346,17 @@ dbAllocNear(struct bmap * bmp,
 *              the allocation group.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
 *      agno    - allocation group number.
- *      nblocks -  actual number of contiguous free blocks desired.
+ *      nblocks -  actual number of contiguous free blocks desired.
- *      l2nb    -  log2 number of contiguous free blocks desired.
+ *      l2nb    -  log2 number of contiguous free blocks desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * note: IWRITE_LOCK(ipmap) held on entry/exit;
 */
@@ -1546,7 +1539,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 /*
 * NAME:        dbAllocAny()
 *
- * FUNCTION:    attempt to allocate the specified number of contiguous
+ * FUNCTION:    attempt to allocate the specified number of contiguous
 *              free blocks anywhere in the file system.
 *
 *              dbAllocAny() attempts to find the sufficient free space by
@@ -1556,16 +1549,16 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
 *              desired free space is allocated.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      nblocks  -  actual number of contiguous free blocks desired.
+ *      nblocks  -  actual number of contiguous free blocks desired.
- *      l2nb     -  log2 number of contiguous free blocks desired.
+ *      l2nb     -  log2 number of contiguous free blocks desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1598,9 +1591,9 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
 /*
 * NAME:        dbFindCtl()
 *
- * FUNCTION:    starting at a specified dmap control page level and block
+ * FUNCTION:    starting at a specified dmap control page level and block
 *              number, search down the dmap control levels for a range of
- *              contiguous free blocks large enough to satisfy an allocation
+ *              contiguous free blocks large enough to satisfy an allocation
 *              request for the specified number of free blocks.
 *
 *              if sufficient contiguous free blocks are found, this routine
@@ -1609,17 +1602,17 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
 *              is sufficient in size.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      level   -  starting dmap control page level.
+ *      level   -  starting dmap control page level.
- *      l2nb    -  log2 number of contiguous free blocks desired.
+ *      l2nb    -  log2 number of contiguous free blocks desired.
- *      *blkno  -  on entry, starting block number for conducting the search.
+ *      *blkno  -  on entry, starting block number for conducting the search.
 *                 on successful return, the first block within a dmap page
 *                 that contains or starts a range of contiguous free blocks.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1699,7 +1692,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
 /*
 * NAME:        dbAllocCtl()
 *
- * FUNCTION:    attempt to allocate a specified number of contiguous
+ * FUNCTION:    attempt to allocate a specified number of contiguous
 *              blocks starting within a specific dmap.
 *
 *              this routine is called by higher level routines that search
@@ -1726,18 +1719,18 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
 *              first dmap (i.e. blkno).
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      nblocks  -  actual number of contiguous free blocks to allocate.
+ *      nblocks  -  actual number of contiguous free blocks to allocate.
- *      l2nb     -  log2 number of contiguous free blocks to allocate.
+ *      l2nb     -  log2 number of contiguous free blocks to allocate.
- *      blkno    -  starting block number of the dmap to start the allocation
+ *      blkno    -  starting block number of the dmap to start the allocation
 *                  from.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1870,7 +1863,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
 /*
 * NAME:        dbAllocDmapLev()
 *
- * FUNCTION:    attempt to allocate a specified number of contiguous blocks
+ * FUNCTION:    attempt to allocate a specified number of contiguous blocks
 *              from a specified dmap.
 *
 *              this routine checks if the contiguous blocks are available.
@@ -1878,17 +1871,17 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
 *              returned.
 *
 * PARAMETERS:
- *      mp      -  pointer to bmap descriptor
+ *      mp      -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to attempt to allocate blocks from.
+ *      dp      -  pointer to dmap to attempt to allocate blocks from.
- *      l2nb    -  log2 number of contiguous block desired.
+ *      l2nb    -  log2 number of contiguous block desired.
- *      nblocks -  actual number of contiguous block desired.
+ *      nblocks -  actual number of contiguous block desired.
- *      results -  on successful return, set to the starting block number
+ *      results -  on successful return, set to the starting block number
 *                 of the newly allocated range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient disk resources
+ *      -ENOSPC - insufficient disk resources
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
 *      IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
@@ -1933,7 +1926,7 @@ dbAllocDmapLev(struct bmap * bmp,
 /*
 * NAME:        dbAllocDmap()
 *
- * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ * FUNCTION:    adjust the disk allocation map to reflect the allocation
 *              of a specified block range within a dmap.
 *
 *              this routine allocates the specified blocks from the dmap
@@ -1946,14 +1939,14 @@ dbAllocDmapLev(struct bmap * bmp,
 *              covers this dmap.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to allocate the block range from.
+ *      dp      -  pointer to dmap to allocate the block range from.
- *      blkno   -  starting block number of the block to be allocated.
+ *      blkno   -  starting block number of the block to be allocated.
- *      nblocks -  number of blocks to be allocated.
+ *      nblocks -  number of blocks to be allocated.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -1989,7 +1982,7 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbFreeDmap()
 *
- * FUNCTION:    adjust the disk allocation map to reflect the allocation
+ * FUNCTION:    adjust the disk allocation map to reflect the allocation
 *              of a specified block range within a dmap.
 *
 *              this routine frees the specified blocks from the dmap through
@@ -1997,18 +1990,18 @@ static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              causes the maximum string of free blocks within the dmap to
 *              change (i.e. the value of the root of the dmap's dmtree), this
 *              routine will cause this change to be reflected up through the
- *              appropriate levels of the dmap control pages by a call to
+ *              appropriate levels of the dmap control pages by a call to
 *              dbAdjCtl() for the L0 dmap control page that covers this dmap.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to free the block range from.
+ *      dp      -  pointer to dmap to free the block range from.
- *      blkno   -  starting block number of the block to be freed.
+ *      blkno   -  starting block number of the block to be freed.
- *      nblocks -  number of blocks to be freed.
+ *      nblocks -  number of blocks to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -2055,7 +2048,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbAllocBits()
 *
- * FUNCTION:    allocate a specified block range from a dmap.
+ * FUNCTION:    allocate a specified block range from a dmap.
 *
 *              this routine updates the dmap to reflect the working
 *              state allocation of the specified block range. it directly
@@ -2065,10 +2058,10 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              dmap's dmtree, as a whole, to reflect the allocated range.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to allocate bits from.
+ *      dp      -  pointer to dmap to allocate bits from.
- *      blkno   -  starting block number of the bits to be allocated.
+ *      blkno   -  starting block number of the bits to be allocated.
- *      nblocks -  number of bits to be allocated.
+ *      nblocks -  number of bits to be allocated.
 *
 * RETURN VALUES: none
 *
@@ -2149,7 +2142,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
                         * the allocated words.
                         */
                        for (; nwords > 0; nwords -= nw) {
-                                if (leaf[word] < BUDMIN) {
+                                if (leaf[word] < BUDMIN) {
                                        jfs_error(bmp->db_ipbmap->i_sb,
                                                  "dbAllocBits: leaf page "
                                                  "corrupt");
@@ -2202,7 +2195,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 /*
 * NAME:        dbFreeBits()
 *
- * FUNCTION:    free a specified block range from a dmap.
+ * FUNCTION:    free a specified block range from a dmap.
 *
 *              this routine updates the dmap to reflect the working
 *              state allocation of the specified block range. it directly
@@ -2212,10 +2205,10 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              dmtree, as a whole, to reflect the deallocated range.
 *
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      dp      -  pointer to dmap to free bits from.
+ *      dp      -  pointer to dmap to free bits from.
- *      blkno   -  starting block number of the bits to be freed.
+ *      blkno   -  starting block number of the bits to be freed.
- *      nblocks -  number of bits to be freed.
+ *      nblocks -  number of bits to be freed.
 *
 * RETURN VALUES: 0 for success
 *
@@ -2388,19 +2381,19 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
 *              the new root value and the next dmap control page level to
 *              be adjusted.
 * PARAMETERS:
- *      bmp     -  pointer to bmap descriptor
+ *      bmp     -  pointer to bmap descriptor
- *      blkno   -  the first block of a block range within a dmap.  it is
+ *      blkno   -  the first block of a block range within a dmap.  it is
 *                 the allocation or deallocation of this block range that
 *                 requires the dmap control page to be adjusted.
- *      newval  -  the new value of the lower level dmap or dmap control
+ *      newval  -  the new value of the lower level dmap or dmap control
 *                 page root.
- *      alloc   -  'true' if adjustment is due to an allocation.
+ *      alloc   -  'true' if adjustment is due to an allocation.
- *      level   -  current level of dmap control page (i.e. L0, L1, L2) to
+ *      level   -  current level of dmap control page (i.e. L0, L1, L2) to
 *                 be adjusted.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 *
 * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
 */
@@ -2544,16 +2537,16 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
 /*
 * NAME:        dbSplit()
 *
- * FUNCTION:    update the leaf of a dmtree with a new value, splitting
+ * FUNCTION:    update the leaf of a dmtree with a new value, splitting
 *              the leaf from the binary buddy system of the dmtree's
 *              leaves, as required.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree containing the leaf.
+ *      tp      - pointer to the tree containing the leaf.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
- *      splitsz - the size the binary buddy system starting at the leaf
+ *      splitsz - the size the binary buddy system starting at the leaf
 *                must be split to, specified as the log2 number of blocks.
- *      newval  - the new value for the leaf.
+ *      newval  - the new value for the leaf.
 *
 * RETURN VALUES: none
 *
@@ -2600,7 +2593,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
 /*
 * NAME:        dbBackSplit()
 *
- * FUNCTION:    back split the binary buddy system of dmtree leaves
+ * FUNCTION:    back split the binary buddy system of dmtree leaves
 *              that hold a specified leaf until the specified leaf
 *              starts its own binary buddy system.
 *
@@ -2617,8 +2610,8 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
 *              in which a previous join operation must be backed out.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree containing the leaf.
+ *      tp      - pointer to the tree containing the leaf.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
 *
 * RETURN VALUES: none
 *
@@ -2692,14 +2685,14 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
 /*
 * NAME:        dbJoin()
 *
- * FUNCTION:    update the leaf of a dmtree with a new value, joining
+ * FUNCTION:    update the leaf of a dmtree with a new value, joining
 *              the leaf with other leaves of the dmtree into a multi-leaf
 *              binary buddy system, as required.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree containing the leaf.
+ *      tp      - pointer to the tree containing the leaf.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
- *      newval  - the new value for the leaf.
+ *      newval  - the new value for the leaf.
 *
 * RETURN VALUES: none
 */
@@ -2785,15 +2778,15 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
 /*
 * NAME:        dbAdjTree()
 *
- * FUNCTION:    update a leaf of a dmtree with a new value, adjusting
+ * FUNCTION:    update a leaf of a dmtree with a new value, adjusting
 *              the dmtree, as required, to reflect the new leaf value.
 *              the combination of any buddies must already be done before
 *              this is called.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree to be adjusted.
+ *      tp      - pointer to the tree to be adjusted.
- *      leafno  - the number of the leaf to be updated.
+ *      leafno  - the number of the leaf to be updated.
- *      newval  - the new value for the leaf.
+ *      newval  - the new value for the leaf.
 *
 * RETURN VALUES: none
 */
@@ -2852,7 +2845,7 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
 /*
 * NAME:        dbFindLeaf()
 *
- * FUNCTION:    search a dmtree_t for sufficient free blocks, returning
+ * FUNCTION:    search a dmtree_t for sufficient free blocks, returning
 *              the index of a leaf describing the free blocks if
 *              sufficient free blocks are found.
 *
@@ -2861,15 +2854,15 @@ static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
 *              free space.
 *
 * PARAMETERS:
- *      tp      - pointer to the tree to be searched.
+ *      tp      - pointer to the tree to be searched.
- *      l2nb    - log2 number of free blocks to search for.
+ *      l2nb    - log2 number of free blocks to search for.
 *      leafidx - return pointer to be set to the index of the leaf
 *                describing at least l2nb free blocks if sufficient
 *                free blocks are found.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOSPC - insufficient free blocks.
+ *      -ENOSPC - insufficient free blocks.
 */
 static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
 {
@@ -2916,18 +2909,18 @@ static int dbFindLeaf(dmtree_t * tp, int l2nb, int *leafidx)
 /*
 * NAME:        dbFindBits()
 *
- * FUNCTION:    find a specified number of binary buddy free bits within a
+ * FUNCTION:    find a specified number of binary buddy free bits within a
 *              dmap bitmap word value.
 *
 *              this routine searches the bitmap value for (1 << l2nb) free
 *              bits at (1 << l2nb) alignments within the value.
 *
 * PARAMETERS:
- *      word    -  dmap bitmap word value.
+ *      word    -  dmap bitmap word value.
- *      l2nb    -  number of free bits specified as a log2 number.
+ *      l2nb    -  number of free bits specified as a log2 number.
 *
 * RETURN VALUES:
- *      starting bit number of free bits.
+ *      starting bit number of free bits.
 */
 static int dbFindBits(u32 word, int l2nb)
 {
@@ -2963,14 +2956,14 @@ static int dbFindBits(u32 word, int l2nb)
 /*
 * NAME:        dbMaxBud(u8 *cp)
 *
- * FUNCTION:    determine the largest binary buddy string of free
+ * FUNCTION:    determine the largest binary buddy string of free
 *              bits within 32-bits of the map.
 *
 * PARAMETERS:
- *      cp      -  pointer to the 32-bit value.
+ *      cp      -  pointer to the 32-bit value.
 *
 * RETURN VALUES:
- *      largest binary buddy of free bits within a dmap word.
+ *      largest binary buddy of free bits within a dmap word.
 */
 static int dbMaxBud(u8 * cp)
 {
@@ -3000,14 +2993,14 @@ static int dbMaxBud(u8 * cp)
 /*
 * NAME:        cnttz(uint word)
 *
- * FUNCTION:    determine the number of trailing zeros within a 32-bit
+ * FUNCTION:    determine the number of trailing zeros within a 32-bit
 *              value.
 *
 * PARAMETERS:
- *      value   -  32-bit value to be examined.
+ *      value   -  32-bit value to be examined.
 *
 * RETURN VALUES:
- *      count of trailing zeros
+ *      count of trailing zeros
 */
 static int cnttz(u32 word)
 {
@@ -3025,14 +3018,14 @@ static int cnttz(u32 word)
 /*
 * NAME:        cntlz(u32 value)
 *
- * FUNCTION:    determine the number of leading zeros within a 32-bit
+ * FUNCTION:    determine the number of leading zeros within a 32-bit
 *              value.
 *
 * PARAMETERS:
- *      value   -  32-bit value to be examined.
+ *      value   -  32-bit value to be examined.
 *
 * RETURN VALUES:
- *      count of leading zeros
+ *      count of leading zeros
 */
 static int cntlz(u32 value)
 {
@@ -3050,14 +3043,14 @@ static int cntlz(u32 value)
 * NAME:        blkstol2(s64 nb)
 *
 * FUNCTION:    convert a block count to its log2 value. if the block
- *              count is not a l2 multiple, it is rounded up to the next
+ *              count is not a l2 multiple, it is rounded up to the next
 *              larger l2 multiple.
 *
 * PARAMETERS:
- *      nb      -  number of blocks
+ *      nb      -  number of blocks
 *
 * RETURN VALUES:
- *      log2 number of blocks
+ *      log2 number of blocks
 */
 static int blkstol2(s64 nb)
 {
@@ -3099,13 +3092,13 @@ static int blkstol2(s64 nb)
 *              at a time.
 *
 * PARAMETERS:
- *      ip      -  pointer to in-core inode;
+ *      ip      -  pointer to in-core inode;
- *      blkno   -  starting block number to be freed.
+ *      blkno   -  starting block number to be freed.
- *      nblocks -  number of blocks to be freed.
+ *      nblocks -  number of blocks to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error
+ *      -EIO    - i/o error
 */
 int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
 {
@@ -3278,10 +3271,10 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
 * L2
 *  |
 *   L1---------------------------------L1
- *    |                                  |
+ *    |                                  |
- *     L0---------L0---------L0           L0---------L0---------L0
+ *     L0---------L0---------L0           L0---------L0---------L0
- *      |          |          |            |          |          |
+ *      |          |          |            |          |          |
- *       d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
+ *       d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
 * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
 *
 * <---old---><----------------------------extend----------------------->
@@ -3307,7 +3300,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
                 (long long) blkno, (long long) nblocks, (long long) newsize);
        /*
-         *      initialize bmap control page.
+         *      initialize bmap control page.
         *
         * all the data in bmap control page should exclude
         * the mkfs hidden dmap page.
@@ -3330,7 +3323,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
        bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
        /*
-         *      reconfigure db_agfree[]
+         *      reconfigure db_agfree[]
         * from old AG configuration to new AG configuration;
         *
         * coalesce contiguous k (newAGSize/oldAGSize) AGs;
@@ -3362,7 +3355,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
        bmp->db_maxag = bmp->db_maxag / k;
        /*
-         *      extend bmap
+         *      extend bmap
         *
         * update bit maps and corresponding level control pages;
         * global control page db_nfree, db_agfree[agno], db_maxfreebud;
@@ -3410,7 +3403,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
                        /* compute start L0 */
                        j = 0;
                        l1leaf = l1dcp->stree + CTLLEAFIND;
-                        p += nbperpage; /* 1st L0 of L1.k  */
+                        p += nbperpage; /* 1st L0 of L1.k */
                }
                /*
@@ -3548,7 +3541,7 @@ errout:
        return -EIO;
        /*
-         *      finalize bmap control page
+         *      finalize bmap control page
         */
 finalize:
@@ -3567,7 +3560,7 @@ void dbFinalizeBmap(struct inode *ipbmap)
        int i, n;
        /*
-         *      finalize bmap control page
+         *      finalize bmap control page
         */
 //finalize:
        /*
@@ -3953,8 +3946,8 @@ static int dbGetL2AGSize(s64 nblocks)
 * convert number of map pages to the zero origin top dmapctl level
 */
 #define BMAPPGTOLEV(npages)     \
-        (((npages) <= 3 + MAXL0PAGES) ? 0 \
+        (((npages) <= 3 + MAXL0PAGES) ? 0 : \
-       : ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
+         ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
 s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
 {
@@ -3981,8 +3974,8 @@ s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
                factor =
                    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
                complete = (u32) npages / factor;
-                ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL
+                ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
-                                      : ((i == 1) ? LPERCTL : 1));
+                                      ((i == 1) ? LPERCTL : 1));
                /* pages in last/incomplete child */
                npages = (u32) npages % factor;
diff --git a/fs/jfs/jfs_dmap.h b/fs/jfs/jfs_dmap.h
index 45ea454c74bd..11e6d471b364 100644
--- a/fs/jfs/jfs_dmap.h
+++ b/fs/jfs/jfs_dmap.h
@@ -83,7 +83,7 @@ static __inline signed char TREEMAX(signed char *cp)
 *      - 1 is added to account for the control page of the map.
 */
 #define BLKTODMAP(b,s)    \
-        ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
+        ((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
 /*
 * convert disk block number to the logical block number of the LEVEL 0
@@ -98,7 +98,7 @@ static __inline signed char TREEMAX(signed char *cp)
 *      - 1 is added to account for the control page of the map.
 */
 #define BLKTOL0(b,s)      \
-        (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
+        (((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
 /*
 * convert disk block number to the logical block number of the LEVEL 1
@@ -120,7 +120,7 @@ static __inline signed char TREEMAX(signed char *cp)
 * at the specified level which describes the disk block.
 */
 #define BLKTOCTL(b,s,l)   \
-        (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
+        (((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
 /*
 * convert aggregate map size to the zero origin dmapctl level of the
@@ -145,27 +145,27 @@ static __inline signed char TREEMAX(signed char *cp)
 * dmaptree must be consistent with dmapctl.
 */
 struct dmaptree {
-        __le32 nleafs;          /* 4: number of tree leafs      */
+        __le32 nleafs;          /* 4: number of tree leafs      */
-        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
+        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
-        __le32 leafidx;         /* 4: index of first tree leaf  */
+        __le32 leafidx;         /* 4: index of first tree leaf  */
-        __le32 height;          /* 4: height of the tree        */
+        __le32 height;          /* 4: height of the tree        */
        s8 budmin;              /* 1: min l2 tree leaf value to combine */
-        s8 stree[TREESIZE];     /* TREESIZE: tree               */
+        s8 stree[TREESIZE];     /* TREESIZE: tree               */
-        u8 pad[2];              /* 2: pad to word boundary      */
+        u8 pad[2];              /* 2: pad to word boundary      */
-};                              /* - 360 -                      */
+};                              /* - 360 -                      */
 /*
 *      dmap page per 8K blocks bitmap
 */
 struct dmap {
-        __le32 nblocks;         /* 4: num blks covered by this dmap     */
+        __le32 nblocks;         /* 4: num blks covered by this dmap     */
-        __le32 nfree;           /* 4: num of free blks in this dmap     */
+        __le32 nfree;           /* 4: num of free blks in this dmap     */
-        __le64 start;           /* 8: starting blkno for this dmap      */
+        __le64 start;           /* 8: starting blkno for this dmap      */
-        struct dmaptree tree;   /* 360: dmap tree                       */
+        struct dmaptree tree;   /* 360: dmap tree                       */
-        u8 pad[1672];           /* 1672: pad to 2048 bytes              */
+        u8 pad[1672];           /* 1672: pad to 2048 bytes              */
-        __le32 wmap[LPERDMAP];  /* 1024: bits of the working map        */
+        __le32 wmap[LPERDMAP];  /* 1024: bits of the working map        */
-        __le32 pmap[LPERDMAP];  /* 1024: bits of the persistent map     */
+        __le32 pmap[LPERDMAP];  /* 1024: bits of the persistent map     */
-};                              /* - 4096 -                             */
+};                              /* - 4096 -                             */
 /*
 *      disk map control page per level.
@@ -173,14 +173,14 @@ struct dmap {
 * dmapctl must be consistent with dmaptree.
 */
 struct dmapctl {
-        __le32 nleafs;          /* 4: number of tree leafs      */
+        __le32 nleafs;          /* 4: number of tree leafs      */
-        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
+        __le32 l2nleafs;        /* 4: l2 number of tree leafs   */
-        __le32 leafidx;         /* 4: index of the first tree leaf      */
+        __le32 leafidx;         /* 4: index of the first tree leaf      */
-        __le32 height;          /* 4: height of tree            */
+        __le32 height;          /* 4: height of tree            */
-        s8 budmin;              /* 1: minimum l2 tree leaf value        */
+        s8 budmin;              /* 1: minimum l2 tree leaf value        */
-        s8 stree[CTLTREESIZE];  /* CTLTREESIZE: dmapctl tree    */
+        s8 stree[CTLTREESIZE];  /* CTLTREESIZE: dmapctl tree    */
-        u8 pad[2714];           /* 2714: pad to 4096            */
+        u8 pad[2714];           /* 2714: pad to 4096            */
-};                              /* - 4096 -                     */
+};                              /* - 4096 -                     */
 /*
 *      common definition for dmaptree within dmap and dmapctl
@@ -202,41 +202,41 @@ typedef union dmtree {
 *      on-disk aggregate disk allocation map descriptor.
 */
 struct dbmap_disk {
-        __le64 dn_mapsize;      /* 8: number of blocks in aggregate     */
+        __le64 dn_mapsize;      /* 8: number of blocks in aggregate     */
-        __le64 dn_nfree;        /* 8: num free blks in aggregate map    */
+        __le64 dn_nfree;        /* 8: num free blks in aggregate map    */
-        __le32 dn_l2nbperpage;  /* 4: number of blks per page           */
+        __le32 dn_l2nbperpage;  /* 4: number of blks per page           */
-        __le32 dn_numag;        /* 4: total number of ags               */
+        __le32 dn_numag;        /* 4: total number of ags               */
-        __le32 dn_maxlevel;     /* 4: number of active ags              */
+        __le32 dn_maxlevel;     /* 4: number of active ags              */
-        __le32 dn_maxag;        /* 4: max active alloc group number     */
+        __le32 dn_maxag;        /* 4: max active alloc group number     */
-        __le32 dn_agpref;       /* 4: preferred alloc group (hint)      */
+        __le32 dn_agpref;       /* 4: preferred alloc group (hint)      */
-        __le32 dn_aglevel;      /* 4: dmapctl level holding the AG      */
+        __le32 dn_aglevel;      /* 4: dmapctl level holding the AG      */
-        __le32 dn_agheigth;     /* 4: height in dmapctl of the AG       */
+        __le32 dn_agheigth;     /* 4: height in dmapctl of the AG       */
-        __le32 dn_agwidth;      /* 4: width in dmapctl of the AG        */
+        __le32 dn_agwidth;      /* 4: width in dmapctl of the AG        */
-        __le32 dn_agstart;      /* 4: start tree index at AG height     */
+        __le32 dn_agstart;      /* 4: start tree index at AG height     */
-        __le32 dn_agl2size;     /* 4: l2 num of blks per alloc group    */
+        __le32 dn_agl2size;     /* 4: l2 num of blks per alloc group    */
-        __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count           */
+        __le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count           */
-        __le64 dn_agsize;       /* 8: num of blks per alloc group       */
+        __le64 dn_agsize;       /* 8: num of blks per alloc group       */
-        s8 dn_maxfreebud;       /* 1: max free buddy system             */
+        s8 dn_maxfreebud;       /* 1: max free buddy system             */
-        u8 pad[3007];           /* 3007: pad to 4096                    */
+        u8 pad[3007];           /* 3007: pad to 4096                    */
-};                              /* - 4096 -                             */
+};                              /* - 4096 -                             */
 struct dbmap {
-        s64 dn_mapsize;         /* number of blocks in aggregate     */
+        s64 dn_mapsize;         /* number of blocks in aggregate        */
-        s64 dn_nfree;           /* num free blks in aggregate map    */
+        s64 dn_nfree;           /* num free blks in aggregate map       */
-        int dn_l2nbperpage;     /* number of blks per page           */
+        int dn_l2nbperpage;     /* number of blks per page              */
-        int dn_numag;           /* total number of ags               */
+        int dn_numag;           /* total number of ags                  */
-        int dn_maxlevel;        /* number of active ags              */
+        int dn_maxlevel;        /* number of active ags                 */
-        int dn_maxag;           /* max active alloc group number     */
+        int dn_maxag;           /* max active alloc group number        */
-        int dn_agpref;          /* preferred alloc group (hint)      */
+        int dn_agpref;          /* preferred alloc group (hint)         */
-        int dn_aglevel;         /* dmapctl level holding the AG      */
+        int dn_aglevel;         /* dmapctl level holding the AG         */
-        int dn_agheigth;        /* height in dmapctl of the AG       */
+        int dn_agheigth;        /* height in dmapctl of the AG          */
-        int dn_agwidth;         /* width in dmapctl of the AG        */
+        int dn_agwidth;         /* width in dmapctl of the AG           */
-        int dn_agstart;         /* start tree index at AG height     */
+        int dn_agstart;         /* start tree index at AG height        */
-        int dn_agl2size;        /* l2 num of blks per alloc group    */
+        int dn_agl2size;        /* l2 num of blks per alloc group       */
-        s64 dn_agfree[MAXAG];   /* per AG free count           */
+        s64 dn_agfree[MAXAG];   /* per AG free count                    */
-        s64 dn_agsize;          /* num of blks per alloc group       */
+        s64 dn_agsize;          /* num of blks per alloc group          */
-        signed char dn_maxfreebud;      /* max free buddy system             */
+        signed char dn_maxfreebud;      /* max free buddy system        */
-};                              /* - 4096 -                             */
+};                              /* - 4096 -                             */
 /*
 *      in-memory aggregate disk allocation map descriptor.
 */
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 6d62f3222892..c14ba3cfa818 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -315,8 +315,8 @@ static inline void lock_index(tid_t tid, struct inode *ip, struct metapage * mp,
        lv = &llck->lv[llck->index];
        /*
-         *      Linelock slot size is twice the size of directory table
+         *      Linelock slot size is twice the size of directory table
-         *      slot size.  512 entries per page.
+         *      slot size.  512 entries per page.
         */
        lv->offset = ((index - 2) & 511) >> 1;
        lv->length = 1;
@@ -615,7 +615,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
        btstack->nsplit = 1;
        /*
-         *      search down tree from root:
+         *      search down tree from root:
         *
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -659,7 +659,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
                        }
                        if (cmp == 0) {
                                /*
-                                 *      search hit
+                                 *      search hit
                                 */
                                /* search hit - leaf page:
                                 * return the entry found
@@ -723,7 +723,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
                }
                /*
-                 *      search miss
+                 *      search miss
                 *
                 * base is the smallest index with key (Kj) greater than
                 * search key (K) and may be zero or (maxindex + 1) index.
@@ -834,7 +834,7 @@ int dtInsert(tid_t tid, struct inode *ip,
        struct lv *lv;
        /*
-         *      retrieve search result
+         *      retrieve search result
         *
         * dtSearch() returns (leaf page pinned, index at which to insert).
         * n.b. dtSearch() may return index of (maxindex + 1) of
@@ -843,7 +843,7 @@ int dtInsert(tid_t tid, struct inode *ip,
        DT_GETSEARCH(ip, btstack->top, bn, mp, p, index);
        /*
-         *      insert entry for new key
+         *      insert entry for new key
         */
        if (DO_INDEX(ip)) {
                if (JFS_IP(ip)->next_index == DIREND) {
@@ -860,9 +860,9 @@ int dtInsert(tid_t tid, struct inode *ip,
        data.leaf.ino = *fsn;
        /*
-         *      leaf page does not have enough room for new entry:
+         *      leaf page does not have enough room for new entry:
         *
-         *      extend/split the leaf page;
+         *      extend/split the leaf page;
         *
         * dtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -877,9 +877,9 @@ int dtInsert(tid_t tid, struct inode *ip,
        }
        /*
-         *      leaf page does have enough room for new entry:
+         *      leaf page does have enough room for new entry:
         *
-         *      insert the new data entry into the leaf page;
+         *      insert the new data entry into the leaf page;
         */
        BT_MARK_DIRTY(mp, ip);
        /*
@@ -967,13 +967,13 @@ static int dtSplitUp(tid_t tid,
        }
        /*
-         *      split leaf page
+         *      split leaf page
         *
         * The split routines insert the new entry, and
         * acquire txLock as appropriate.
         */
        /*
-         *      split root leaf page:
+         *      split root leaf page:
         */
        if (sp->header.flag & BT_ROOT) {
                /*
@@ -1012,7 +1012,7 @@ static int dtSplitUp(tid_t tid,
        }
        /*
-         *      extend first leaf page
+         *      extend first leaf page
         *
         * extend the 1st extent if less than buffer page size
         * (dtExtendPage() reurns leaf page unpinned)
@@ -1068,7 +1068,7 @@ static int dtSplitUp(tid_t tid,
        }
        /*
-         *      split leaf page <sp> into <sp> and a new right page <rp>.
+         *      split leaf page <sp> into <sp> and a new right page <rp>.
         *
         * return <rp> pinned and its extent descriptor <rpxd>
         */
@@ -1433,7 +1433,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        rp->header.freecnt = rp->header.maxslot - fsi;
        /*
-         *      sequential append at tail: append without split
+         *      sequential append at tail: append without split
         *
         * If splitting the last page on a level because of appending
         * a entry to it (skip is maxentry), it's likely that the access is
@@ -1467,7 +1467,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        }
        /*
-         *      non-sequential insert (at possibly middle page)
+         *      non-sequential insert (at possibly middle page)
         */
        /*
@@ -1508,7 +1508,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        left = 0;
        /*
-         *      compute fill factor for split pages
+         *      compute fill factor for split pages
         *
         * <nxt> traces the next entry to move to rp
         * <off> traces the next entry to stay in sp
@@ -1551,7 +1551,7 @@ static int dtSplitPage(tid_t tid, struct inode *ip, struct dtsplit * split,
        /* <nxt> poins to the 1st entry to move */
        /*
-         *      move entries to right page
+         *      move entries to right page
         *
         * dtMoveEntry() initializes rp and reserves entry for insertion
         *
@@ -1677,7 +1677,7 @@ static int dtExtendPage(tid_t tid,
                return (rc);
        /*
-         *      extend the extent
+         *      extend the extent
         */
        pxdlist = split->pxdlist;
        pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1722,7 +1722,7 @@ static int dtExtendPage(tid_t tid,
        }
        /*
-         *      extend the page
+         *      extend the page
         */
        sp->header.self = *pxd;
@@ -1739,9 +1739,6 @@ static int dtExtendPage(tid_t tid,
        /* update buffer extent descriptor of extended page */
        xlen = lengthPXD(pxd);
        xsize = xlen << JFS_SBI(sb)->l2bsize;
-#ifdef _STILL_TO_PORT
-        bmSetXD(smp, xaddr, xsize);
-#endif                          /*  _STILL_TO_PORT */
        /*
         * copy old stbl to new stbl at start of extended area
@@ -1836,7 +1833,7 @@ static int dtExtendPage(tid_t tid,
        }
        /*
-         *      update parent entry on the parent/root page
+         *      update parent entry on the parent/root page
         */
        /*
         * acquire a transaction lock on the parent/root page
@@ -1904,7 +1901,7 @@ static int dtSplitRoot(tid_t tid,
        sp = &JFS_IP(ip)->i_dtroot;
        /*
-         *      allocate/initialize a single (right) child page
+         *      allocate/initialize a single (right) child page
         *
         * N.B. at first split, a one (or two) block to fit new entry
         * is allocated; at subsequent split, a full page is allocated;
@@ -1943,7 +1940,7 @@ static int dtSplitRoot(tid_t tid,
        rp->header.prev = 0;
        /*
-         *      move in-line root page into new right page extent
+         *      move in-line root page into new right page extent
         */
        /* linelock header + copied entries + new stbl (1st slot) in new page */
        ASSERT(dtlck->index == 0);
@@ -2016,7 +2013,7 @@ static int dtSplitRoot(tid_t tid,
        dtInsertEntry(rp, split->index, split->key, split->data, &dtlck);
        /*
-         *      reset parent/root page
+         *      reset parent/root page
         *
         * set the 1st entry offset to 0, which force the left-most key
         * at any level of the tree to be less than any search key.
@@ -2102,7 +2099,7 @@ int dtDelete(tid_t tid,
        dtpage_t *np;
        /*
-         *      search for the entry to delete:
+         *      search for the entry to delete:
         *
         * dtSearch() returns (leaf page pinned, index at which to delete).
         */
@@ -2253,7 +2250,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
        int i;
        /*
-         *      keep the root leaf page which has become empty
+         *      keep the root leaf page which has become empty
         */
        if (BT_IS_ROOT(fmp)) {
                /*
@@ -2269,7 +2266,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
        }
        /*
-         *      free the non-root leaf page
+         *      free the non-root leaf page
         */
        /*
         * acquire a transaction lock on the page
@@ -2299,7 +2296,7 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
        discard_metapage(fmp);
        /*
-         *      propagate page deletion up the directory tree
+         *      propagate page deletion up the directory tree
         *
         * If the delete from the parent page makes it empty,
         * continue all the way up the tree.
@@ -2440,10 +2437,10 @@ static int dtDeleteUp(tid_t tid, struct inode *ip,
 #ifdef _NOTYET
 /*
- * NAME:        dtRelocate()
+ * NAME:        dtRelocate()
 *
- * FUNCTION:    relocate dtpage (internal or leaf) of directory;
+ * FUNCTION:    relocate dtpage (internal or leaf) of directory;
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 */
 int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
               s64 nxaddr)
@@ -2471,8 +2468,8 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
                   xlen);
        /*
-         *      1. get the internal parent dtpage covering
+         *      1. get the internal parent dtpage covering
-         *      router entry for the tartget page to be relocated;
+         *      router entry for the tartget page to be relocated;
         */
        rc = dtSearchNode(ip, lmxaddr, opxd, &btstack);
        if (rc)
@@ -2483,7 +2480,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
        jfs_info("dtRelocate: parent router entry validated.");
        /*
-         *      2. relocate the target dtpage
+         *      2. relocate the target dtpage
         */
        /* read in the target page from src extent */
        DT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
@@ -2581,9 +2578,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
        /* update the buffer extent descriptor of the dtpage */
        xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
-#ifdef _STILL_TO_PORT
-        bmSetXD(mp, nxaddr, xsize);
-#endif /* _STILL_TO_PORT */
        /* unpin the relocated page */
        DT_PUTPAGE(mp);
        jfs_info("dtRelocate: target dtpage relocated.");
@@ -2594,7 +2589,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
         */
        /*
-         *      3. acquire maplock for the source extent to be freed;
+         *      3. acquire maplock for the source extent to be freed;
         */
        /* for dtpage relocation, write a LOG_NOREDOPAGE record
         * for the source dtpage (logredo() will init NoRedoPage
@@ -2609,7 +2604,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
        pxdlock->index = 1;
        /*
-         *      4. update the parent router entry for relocation;
+         *      4. update the parent router entry for relocation;
         *
         * acquire tlck for the parent entry covering the target dtpage;
         * write LOG_REDOPAGE to apply after image only;
@@ -2637,7 +2632,7 @@ int dtRelocate(tid_t tid, struct inode *ip, s64 lmxaddr, pxd_t * opxd,
 * NAME:        dtSearchNode()
 *
 * FUNCTION:    Search for an dtpage containing a specified address
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 *
 * NOTE:        Search result on stack, the found page is pinned at exit.
 *              The result page must be an internal dtpage.
@@ -2660,7 +2655,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
        BT_CLR(btstack);        /* reset stack */
        /*
-         *      descend tree to the level with specified leftmost page
+         *      descend tree to the level with specified leftmost page
         *
         *  by convention, root bn = 0.
         */
@@ -2699,7 +2694,7 @@ static int dtSearchNode(struct inode *ip, s64 lmxaddr, pxd_t * kpxd,
        }
        /*
-         *      search each page at the current levevl
+         *      search each page at the current levevl
         */
      loop:
        stbl = DT_GETSTBL(p);
@@ -3044,9 +3039,9 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        if (DO_INDEX(ip)) {
                /*
                 * persistent index is stored in directory entries.
-                 * Special cases:        0 = .
+                 * Special cases:        0 = .
-                 *                       1 = ..
+                 *                       1 = ..
-                 *                      -1 = End of directory
+                 *                      -1 = End of directory
                 */
                do_index = 1;
@@ -3128,10 +3123,10 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                /*
                 * Legacy filesystem - OS/2 & Linux JFS < 0.3.6
                 *
-                 * pn = index = 0:      First entry "."
+                 * pn = index = 0:      First entry "."
-                 * pn = 0; index = 1:   Second entry ".."
+                 * pn = 0; index = 1:   Second entry ".."
-                 * pn > 0:              Real entries, pn=1 -> leftmost page
+                 * pn > 0:              Real entries, pn=1 -> leftmost page
-                 * pn = index = -1:     No more entries
+                 * pn = index = -1:     No more entries
                 */
                dtpos = filp->f_pos;
                if (dtpos == 0) {
@@ -3351,7 +3346,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack)
        BT_CLR(btstack);        /* reset stack */
        /*
-         *      descend leftmost path of the tree
+         *      descend leftmost path of the tree
         *
         * by convention, root bn = 0.
         */
@@ -4531,7 +4526,7 @@ int dtModify(tid_t tid, struct inode *ip,
        struct ldtentry *entry;
        /*
-         *      search for the entry to modify:
+         *      search for the entry to modify:
         *
         * dtSearch() returns (leaf page pinned, index at which to modify).
         */
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h
index af8513f78648..8561c6ecece0 100644
--- a/fs/jfs/jfs_dtree.h
+++ b/fs/jfs/jfs_dtree.h
@@ -35,7 +35,7 @@ typedef union {
 /*
- *      entry segment/slot
+ *      entry segment/slot
 *
 * an entry consists of type dependent head/only segment/slot and
 * additional segments/slots linked vi next field;
diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c
index a35bdca6a805..7ae1e3281de9 100644
--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -34,8 +34,8 @@ static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
 #endif
 static s64 extRoundDown(s64 nb);
-#define DPD(a)          (printk("(a): %d\n",(a)))
+#define DPD(a)          (printk("(a): %d\n",(a)))
-#define DPC(a)          (printk("(a): %c\n",(a)))
+#define DPC(a)          (printk("(a): %c\n",(a)))
 #define DPL1(a)                                 \
 {                                               \
        if ((a) >> 32)                          \
@@ -51,19 +51,19 @@ static s64 extRoundDown(s64 nb);
                printk("(a): %x\n",(a) << 32);  \
 }
-#define DPD1(a)         (printk("(a): %d  ",(a)))
+#define DPD1(a)         (printk("(a): %d  ",(a)))
-#define DPX(a)          (printk("(a): %08x\n",(a)))
+#define DPX(a)          (printk("(a): %08x\n",(a)))
-#define DPX1(a)         (printk("(a): %08x  ",(a)))
+#define DPX1(a)         (printk("(a): %08x  ",(a)))
-#define DPS(a)          (printk("%s\n",(a)))
+#define DPS(a)          (printk("%s\n",(a)))
-#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
+#define DPE(a)          (printk("\nENTERING: %s\n",(a)))
-#define DPE1(a)          (printk("\nENTERING: %s",(a)))
+#define DPE1(a)         (printk("\nENTERING: %s",(a)))
-#define DPS1(a)         (printk("  %s  ",(a)))
+#define DPS1(a)         (printk("  %s  ",(a)))
 /*
 * NAME:        extAlloc()
 *
- * FUNCTION:    allocate an extent for a specified page range within a
+ * FUNCTION:    allocate an extent for a specified page range within a
 *              file.
 *
 * PARAMETERS:
@@ -78,9 +78,9 @@ static s64 extRoundDown(s64 nb);
 *                should be marked as allocated but not recorded.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int
 extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
@@ -192,9 +192,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 #ifdef _NOTYET
 /*
- * NAME:        extRealloc()
+ * NAME:        extRealloc()
 *
- * FUNCTION:    extend the allocation of a file extent containing a
+ * FUNCTION:    extend the allocation of a file extent containing a
 *              partial back last page.
 *
 * PARAMETERS:
@@ -207,9 +207,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
 *                should be marked as allocated but not recorded.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
 {
@@ -345,9 +345,9 @@ exit:
 /*
- * NAME:        extHint()
+ * NAME:        extHint()
 *
- * FUNCTION:    produce an extent allocation hint for a file offset.
+ * FUNCTION:    produce an extent allocation hint for a file offset.
 *
 * PARAMETERS:
 *      ip      - the inode of the file.
@@ -356,8 +356,8 @@ exit:
 *                the hint.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int extHint(struct inode *ip, s64 offset, xad_t * xp)
 {
@@ -387,7 +387,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
        lxdl.nlxd = 1;
        lxdl.lxd = &lxd;
        LXDoffset(&lxd, prev)
-            LXDlength(&lxd, nbperpage);
+        LXDlength(&lxd, nbperpage);
        xadl.maxnxad = 1;
        xadl.nxad = 0;
@@ -397,11 +397,11 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
        if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
                return (rc);
-        /* check if not extent exists for the previous page.
+        /* check if no extent exists for the previous page.
         * this is possible for sparse files.
         */
        if (xadl.nxad == 0) {
-//              assert(ISSPARSE(ip));
+//              assert(ISSPARSE(ip));
                return (0);
        }
@@ -410,28 +410,28 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp)
         */
        xp->flag &= XAD_NOTRECORDED;
-        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
+        if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
                jfs_error(ip->i_sb, "extHint: corrupt xtree");
                return -EIO;
-        }
+        }
        return (0);
 }
 /*
- * NAME:        extRecord()
+ * NAME:        extRecord()
 *
- * FUNCTION:    change a page with a file from not recorded to recorded.
+ * FUNCTION:    change a page with a file from not recorded to recorded.
 *
 * PARAMETERS:
 *      ip      - inode of the file.
 *      cp      - cbuf of the file page.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int extRecord(struct inode *ip, xad_t * xp)
 {
@@ -451,9 +451,9 @@ int extRecord(struct inode *ip, xad_t * xp)
 #ifdef _NOTYET
 /*
- * NAME:        extFill()
+ * NAME:        extFill()
 *
- * FUNCTION:    allocate disk space for a file page that represents
+ * FUNCTION:    allocate disk space for a file page that represents
 *              a file hole.
 *
 * PARAMETERS:
@@ -461,16 +461,16 @@ int extRecord(struct inode *ip, xad_t * xp)
 *      cp      - cbuf of the file page represent the hole.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 int extFill(struct inode *ip, xad_t * xp)
 {
        int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
        s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
-//      assert(ISSPARSE(ip));
+//      assert(ISSPARSE(ip));
        /* initialize the extent allocation hint */
        XADaddress(xp, 0);
@@ -489,7 +489,7 @@ int extFill(struct inode *ip, xad_t * xp)
 /*
 * NAME:        extBalloc()
 *
- * FUNCTION:    allocate disk blocks to form an extent.
+ * FUNCTION:    allocate disk blocks to form an extent.
 *
 *              initially, we will try to allocate disk blocks for the
 *              requested size (nblocks).  if this fails (nblocks
@@ -513,9 +513,9 @@ int extFill(struct inode *ip, xad_t * xp)
 *                 allocated block range.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 static int
 extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
@@ -580,7 +580,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 /*
 * NAME:        extBrealloc()
 *
- * FUNCTION:    attempt to extend an extent's allocation.
+ * FUNCTION:    attempt to extend an extent's allocation.
 *
 *              Initially, we will try to extend the extent's allocation
 *              in place.  If this fails, we'll try to move the extent
@@ -597,8 +597,8 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 *
 * PARAMETERS:
 *      ip       - the inode of the file.
- *      blkno    - starting block number of the extents current allocation.
+ *      blkno    - starting block number of the extents current allocation.
- *      nblks    - number of blocks within the extents current allocation.
+ *      nblks    - number of blocks within the extents current allocation.
 *      newnblks - pointer to a s64 value.  on entry, this value is the
 *                 the new desired extent size (number of blocks).  on
 *                 successful exit, this value is set to the extent's actual
@@ -606,9 +606,9 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
 *      newblkno - the starting block number of the extents new allocation.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
 */
 static int
 extBrealloc(struct inode *ip,
@@ -634,16 +634,16 @@ extBrealloc(struct inode *ip,
 /*
- * NAME:        extRoundDown()
+ * NAME:        extRoundDown()
 *
- * FUNCTION:    round down a specified number of blocks to the next
+ * FUNCTION:    round down a specified number of blocks to the next
 *              smallest power of 2 number.
 *
 * PARAMETERS:
 *      nb      - the inode of the file.
 *
 * RETURN VALUES:
- *      next smallest power of 2 number.
+ *      next smallest power of 2 number.
 */
 static s64 extRoundDown(s64 nb)
 {
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 38f70ac03bec..b3f5463fbe52 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -34,9 +34,9 @@
 #define JFS_UNICODE     0x00000001      /* unicode name */
 /* mount time flags for error handling */
-#define JFS_ERR_REMOUNT_RO 0x00000002   /* remount read-only */
+#define JFS_ERR_REMOUNT_RO 0x00000002   /* remount read-only */
-#define JFS_ERR_CONTINUE   0x00000004   /* continue */
+#define JFS_ERR_CONTINUE   0x00000004   /* continue */
-#define JFS_ERR_PANIC      0x00000008   /* panic */
+#define JFS_ERR_PANIC      0x00000008   /* panic */
 /* Quota support */
 #define JFS_USRQUOTA    0x00000010
@@ -83,7 +83,6 @@
 /*      case-insensitive name/directory support */
 #define JFS_AIX         0x80000000      /* AIX support */
-/*      POSIX name/directory  support - Never implemented*/
 /*
 *      buffer cache configuration
@@ -113,10 +112,10 @@
 #define IDATASIZE       256     /* inode inline data size */
 #define IXATTRSIZE      128     /* inode inline extended attribute size */
-#define XTPAGE_SIZE     4096
+#define XTPAGE_SIZE     4096
-#define log2_PAGESIZE     12
+#define log2_PAGESIZE   12
-#define IAG_SIZE        4096
+#define IAG_SIZE        4096
 #define IAG_EXTENT_SIZE 4096
 #define INOSPERIAG      4096    /* number of disk inodes per iag */
 #define L2INOSPERIAG    12      /* l2 number of disk inodes per iag */
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index c6530227cda6..3870ba8b9086 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -93,21 +93,21 @@ static int copy_from_dinode(struct dinode *, struct inode *);
 static void copy_to_dinode(struct dinode *, struct inode *);
 /*
- * NAME:        diMount()
+ * NAME:        diMount()
 *
- * FUNCTION:    initialize the incore inode map control structures for
+ * FUNCTION:    initialize the incore inode map control structures for
 *              a fileset or aggregate init time.
 *
- *              the inode map's control structure (dinomap) is
+ *              the inode map's control structure (dinomap) is
- *              brought in from disk and placed in virtual memory.
+ *              brought in from disk and placed in virtual memory.
 *
 * PARAMETERS:
- *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *      ipimap  - pointer to inode map inode for the aggregate or fileset.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM  - insufficient free virtual memory.
+ *      -ENOMEM - insufficient free virtual memory.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diMount(struct inode *ipimap)
 {
@@ -180,18 +180,18 @@ int diMount(struct inode *ipimap)
 /*
- * NAME:        diUnmount()
+ * NAME:        diUnmount()
 *
- * FUNCTION:    write to disk the incore inode map control structures for
+ * FUNCTION:    write to disk the incore inode map control structures for
 *              a fileset or aggregate at unmount time.
 *
 * PARAMETERS:
- *      ipimap  - pointer to inode map inode for the aggregate or fileset.
+ *      ipimap  - pointer to inode map inode for the aggregate or fileset.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM  - insufficient free virtual memory.
+ *      -ENOMEM - insufficient free virtual memory.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diUnmount(struct inode *ipimap, int mounterror)
 {
@@ -274,9 +274,9 @@ int diSync(struct inode *ipimap)
 /*
- * NAME:        diRead()
+ * NAME:        diRead()
 *
- * FUNCTION:    initialize an incore inode from disk.
+ * FUNCTION:    initialize an incore inode from disk.
 *
 *              on entry, the specifed incore inode should itself
 *              specify the disk inode number corresponding to the
@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
 *              this routine handles incore inode initialization for
 *              both "special" and "regular" inodes.  special inodes
 *              are those required early in the mount process and
- *              require special handling since much of the file system
+ *              require special handling since much of the file system
 *              is not yet initialized.  these "special" inodes are
 *              identified by a NULL inode map inode pointer and are
 *              actually initialized by a call to diReadSpecial().
@@ -298,12 +298,12 @@ int diSync(struct inode *ipimap)
 *              incore inode.
 *
 * PARAMETERS:
- *      ip  -  pointer to incore inode to be initialized from disk.
+ *      ip      -  pointer to incore inode to be initialized from disk.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
- *      -ENOMEM - insufficient memory
+ *      -ENOMEM - insufficient memory
 *
 */
 int diRead(struct inode *ip)
@@ -410,26 +410,26 @@ int diRead(struct inode *ip)
 /*
- * NAME:        diReadSpecial()
+ * NAME:        diReadSpecial()
 *
- * FUNCTION:    initialize a 'special' inode from disk.
+ * FUNCTION:    initialize a 'special' inode from disk.
 *
 *              this routines handles aggregate level inodes.  The
 *              inode cache cannot differentiate between the
 *              aggregate inodes and the filesystem inodes, so we
 *              handle these here.  We don't actually use the aggregate
- *              inode map, since these inodes are at a fixed location
+ *              inode map, since these inodes are at a fixed location
 *              and in some cases the aggregate inode map isn't initialized
 *              yet.
 *
 * PARAMETERS:
- *      sb - filesystem superblock
+ *      sb - filesystem superblock
 *      inum - aggregate inode number
 *      secondary - 1 if secondary aggregate inode table
 *
 * RETURN VALUES:
- *      new inode       - success
+ *      new inode       - success
- *      NULL            - i/o error.
+ *      NULL            - i/o error.
 */
 struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 {
@@ -502,12 +502,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 }
 /*
- * NAME:        diWriteSpecial()
+ * NAME:        diWriteSpecial()
 *
- * FUNCTION:    Write the special inode to disk
+ * FUNCTION:    Write the special inode to disk
 *
 * PARAMETERS:
- *      ip - special inode
+ *      ip - special inode
 *      secondary - 1 if secondary aggregate inode table
 *
 * RETURN VALUES: none
@@ -554,9 +554,9 @@ void diWriteSpecial(struct inode *ip, int secondary)
 }
 /*
- * NAME:        diFreeSpecial()
+ * NAME:        diFreeSpecial()
 *
- * FUNCTION:    Free allocated space for special inode
+ * FUNCTION:    Free allocated space for special inode
 */
 void diFreeSpecial(struct inode *ip)
 {
@@ -572,9 +572,9 @@ void diFreeSpecial(struct inode *ip)
 /*
- * NAME:        diWrite()
+ * NAME:        diWrite()
 *
- * FUNCTION:    write the on-disk inode portion of the in-memory inode
+ * FUNCTION:    write the on-disk inode portion of the in-memory inode
 *              to its corresponding on-disk inode.
 *
 *              on entry, the specifed incore inode should itself
@@ -589,11 +589,11 @@ void diFreeSpecial(struct inode *ip)
 *
 * PARAMETERS:
 *      tid -  transacation id
- *      ip  -  pointer to incore inode to be written to the inode extent.
+ *      ip  -  pointer to incore inode to be written to the inode extent.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diWrite(tid_t tid, struct inode *ip)
 {
@@ -730,7 +730,7 @@ int diWrite(tid_t tid, struct inode *ip)
        ilinelock = (struct linelock *) & tlck->lock;
        /*
-         *      regular file: 16 byte (XAD slot) granularity
+         *      regular file: 16 byte (XAD slot) granularity
         */
        if (type & tlckXTREE) {
                xtpage_t *p, *xp;
@@ -755,7 +755,7 @@ int diWrite(tid_t tid, struct inode *ip)
                                xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
        }
        /*
-         *      directory: 32 byte (directory entry slot) granularity
+         *      directory: 32 byte (directory entry slot) granularity
         */
        else if (type & tlckDTREE) {
                dtpage_t *p, *xp;
@@ -800,9 +800,8 @@ int diWrite(tid_t tid, struct inode *ip)
        }
        /*
-         *      lock/copy inode base: 128 byte slot granularity
+         *      lock/copy inode base: 128 byte slot granularity
         */
-// baseDinode:
        lv = & dilinelock->lv[dilinelock->index];
        lv->offset = dioffset >> L2INODESLOTSIZE;
        copy_to_dinode(dp, ip);
@@ -813,17 +812,6 @@ int diWrite(tid_t tid, struct inode *ip)
                lv->length = 1;
        dilinelock->index++;
-#ifdef _JFS_FASTDASD
-        /*
-         * We aren't logging changes to the DASD used in directory inodes,
-         * but we need to write them to disk.  If we don't unmount cleanly,
-         * mount will recalculate the DASD used.
-         */
-        if (S_ISDIR(ip->i_mode)
-            && (ip->i_ipmnt->i_mntflag & JFS_DASD_ENABLED))
-                memcpy(&dp->di_DASD, &ip->i_DASD, sizeof(struct dasd));
-#endif                          /*  _JFS_FASTDASD */
        /* release the buffer holding the updated on-disk inode.
         * the buffer will be later written by commit processing.
         */
@@ -834,9 +822,9 @@ int diWrite(tid_t tid, struct inode *ip)
 /*
- * NAME:        diFree(ip)
+ * NAME:        diFree(ip)
 *
- * FUNCTION:    free a specified inode from the inode working map
+ * FUNCTION:    free a specified inode from the inode working map
 *              for a fileset or aggregate.
 *
 *              if the inode to be freed represents the first (only)
@@ -865,11 +853,11 @@ int diWrite(tid_t tid, struct inode *ip)
 *              any updates and are held until all updates are complete.
 *
 * PARAMETERS:
- *      ip      - inode to be freed.
+ *      ip      - inode to be freed.
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diFree(struct inode *ip)
 {
@@ -902,7 +890,8 @@ int diFree(struct inode *ip)
         * the map.
         */
        if (iagno >= imap->im_nextiag) {
-                dump_mem("imap", imap, 32);
+                print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
+                               imap, 32, 0);
                jfs_error(ip->i_sb,
                          "diFree: inum = %d, iagno = %d, nextiag = %d",
                          (uint) inum, iagno, imap->im_nextiag);
@@ -964,8 +953,8 @@ int diFree(struct inode *ip)
                return -EIO;
        }
        /*
-         *      inode extent still has some inodes or below low water mark:
+         *      inode extent still has some inodes or below low water mark:
-         *      keep the inode extent;
+         *      keep the inode extent;
         */
        if (bitmap ||
            imap->im_agctl[agno].numfree < 96 ||
@@ -1047,12 +1036,12 @@ int diFree(struct inode *ip)
        /*
-         *      inode extent has become free and above low water mark:
+         *      inode extent has become free and above low water mark:
-         *      free the inode extent;
+         *      free the inode extent;
         */
        /*
-         *      prepare to update iag list(s) (careful update step 1)
+         *      prepare to update iag list(s) (careful update step 1)
         */
        amp = bmp = cmp = dmp = NULL;
        fwd = back = -1;
@@ -1152,7 +1141,7 @@ int diFree(struct inode *ip)
        invalidate_pxd_metapages(ip, freepxd);
        /*
-         *      update iag list(s) (careful update step 2)
+         *      update iag list(s) (careful update step 2)
         */
        /* add the iag to the ag extent free list if this is the
         * first free extent for the iag.
@@ -1338,20 +1327,20 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 /*
- * NAME:        diAlloc(pip,dir,ip)
+ * NAME:        diAlloc(pip,dir,ip)
 *
- * FUNCTION:    allocate a disk inode from the inode working map
+ * FUNCTION:    allocate a disk inode from the inode working map
 *              for a fileset or aggregate.
 *
 * PARAMETERS:
- *      pip     - pointer to incore inode for the parent inode.
+ *      pip     - pointer to incore inode for the parent inode.
- *      dir     - 'true' if the new disk inode is for a directory.
+ *      dir     - 'true' if the new disk inode is for a directory.
- *      ip      - pointer to a new inode
+ *      ip      - pointer to a new inode
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 {
@@ -1433,7 +1422,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
        addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
        /*
-         *      try to allocate from the IAG
+         *      try to allocate from the IAG
         */
        /* check if the inode may be allocated from the iag
         * (i.e. the inode has free inodes or new extent can be added).
@@ -1633,9 +1622,9 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 /*
- * NAME:        diAllocAG(imap,agno,dir,ip)
+ * NAME:        diAllocAG(imap,agno,dir,ip)
 *
- * FUNCTION:    allocate a disk inode from the allocation group.
+ * FUNCTION:    allocate a disk inode from the allocation group.
 *
 *              this routine first determines if a new extent of free
 *              inodes should be added for the allocation group, with
@@ -1649,17 +1638,17 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
 * PRE CONDITION: Already have the AG lock for this AG.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - allocation group to allocate from.
+ *      agno    - allocation group to allocate from.
- *      dir     - 'true' if the new disk inode is for a directory.
+ *      dir     - 'true' if the new disk inode is for a directory.
- *      ip      - pointer to the new inode to be filled in on successful return
+ *      ip      - pointer to the new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int
 diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1709,9 +1698,9 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
 /*
- * NAME:        diAllocAny(imap,agno,dir,iap)
+ * NAME:        diAllocAny(imap,agno,dir,iap)
 *
- * FUNCTION:    allocate a disk inode from any other allocation group.
+ * FUNCTION:    allocate a disk inode from any other allocation group.
 *
 *              this routine is called when an allocation attempt within
 *              the primary allocation group has failed. if attempts to
@@ -1719,17 +1708,17 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
 *              specified primary group.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - primary allocation group (to avoid).
+ *      agno    - primary allocation group (to avoid).
- *      dir     - 'true' if the new disk inode is for a directory.
+ *      dir     - 'true' if the new disk inode is for a directory.
- *      ip      - pointer to a new inode to be filled in on successful return
+ *      ip      - pointer to a new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int
 diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
@@ -1772,9 +1761,9 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
 /*
- * NAME:        diAllocIno(imap,agno,ip)
+ * NAME:        diAllocIno(imap,agno,ip)
 *
- * FUNCTION:    allocate a disk inode from the allocation group's free
+ * FUNCTION:    allocate a disk inode from the allocation group's free
 *              inode list, returning an error if this free list is
 *              empty (i.e. no iags on the list).
 *
@@ -1785,16 +1774,16 @@ diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
 * PRE CONDITION: Already have AG lock for this AG.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - allocation group.
+ *      agno    - allocation group.
- *      ip      - pointer to new inode to be filled in on successful return
+ *      ip      - pointer to new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 {
@@ -1890,7 +1879,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 /*
- * NAME:        diAllocExt(imap,agno,ip)
+ * NAME:        diAllocExt(imap,agno,ip)
 *
 * FUNCTION:    add a new extent of free inodes to an iag, allocating
 *              an inode from this extent to satisfy the current allocation
@@ -1910,16 +1899,16 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
 *              for the purpose of satisfying this request.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      agno    - allocation group number.
+ *      agno    - allocation group number.
- *      ip      - pointer to new inode to be filled in on successful return
+ *      ip      - pointer to new inode to be filled in on successful return
 *                with the disk inode number allocated, its extent address
 *                and the start of the ag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 {
@@ -2010,7 +1999,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 /*
- * NAME:        diAllocBit(imap,iagp,ino)
+ * NAME:        diAllocBit(imap,iagp,ino)
 *
 * FUNCTION:    allocate a backed inode from an iag.
 *
@@ -2030,14 +2019,14 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
 *      this AG.  Must have read lock on imap inode.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagp    - pointer to iag.
+ *      iagp    - pointer to iag.
- *      ino     - inode number to be allocated within the iag.
+ *      ino     - inode number to be allocated within the iag.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 {
@@ -2144,11 +2133,11 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 /*
- * NAME:        diNewExt(imap,iagp,extno)
+ * NAME:        diNewExt(imap,iagp,extno)
 *
- * FUNCTION:    initialize a new extent of inodes for an iag, allocating
+ * FUNCTION:    initialize a new extent of inodes for an iag, allocating
- *              the first inode of the extent for use for the current
+ *              the first inode of the extent for use for the current
- *              allocation request.
+ *              allocation request.
 *
 *              disk resources are allocated for the new extent of inodes
 *              and the inodes themselves are initialized to reflect their
@@ -2177,14 +2166,14 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
 *      this AG.  Must have read lock on imap inode.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagp    - pointer to iag.
+ *      iagp    - pointer to iag.
- *      extno   - extent number.
+ *      extno   - extent number.
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 {
@@ -2430,7 +2419,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 /*
- * NAME:        diNewIAG(imap,iagnop,agno)
+ * NAME:        diNewIAG(imap,iagnop,agno)
 *
 * FUNCTION:    allocate a new iag for an allocation group.
 *
@@ -2443,16 +2432,16 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 *              and returned to satisfy the request.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagnop  - pointer to an iag number set with the number of the
+ *      iagnop  - pointer to an iag number set with the number of the
 *                newly allocated iag upon successful return.
- *      agno    - allocation group number.
+ *      agno    - allocation group number.
 *      bpp     - Buffer pointer to be filled in with new IAG's buffer
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -ENOSPC - insufficient disk resources.
+ *      -ENOSPC - insufficient disk resources.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 *
 * serialization:
 *      AG lock held on entry/exit;
@@ -2461,7 +2450,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
 *
 * note: new iag transaction:
 * . synchronously write iag;
- * . write log of xtree and inode  of imap;
+ * . write log of xtree and inode of imap;
 * . commit;
 * . synchronous write of xtree (right to left, bottom to top);
 * . at start of logredo(): init in-memory imap with one additional iag page;
@@ -2481,9 +2470,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
        s64 xaddr = 0;
        s64 blkno;
        tid_t tid;
-#ifdef _STILL_TO_PORT
-        xad_t xad;
-#endif                          /*  _STILL_TO_PORT */
        struct inode *iplist[1];
        /* pick up pointers to the inode map and mount inodes */
@@ -2674,15 +2660,15 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 }
 /*
- * NAME:        diIAGRead()
+ * NAME:        diIAGRead()
 *
- * FUNCTION:    get the buffer for the specified iag within a fileset
+ * FUNCTION:    get the buffer for the specified iag within a fileset
 *              or aggregate inode map.
 *
 * PARAMETERS:
- *      imap    - pointer to inode map control structure.
+ *      imap    - pointer to inode map control structure.
- *      iagno   - iag number.
+ *      iagno   - iag number.
- *      bpp     - point to buffer pointer to be filled in on successful
+ *      bpp     - point to buffer pointer to be filled in on successful
 *                exit.
 *
 * SERIALIZATION:
@@ -2691,8 +2677,8 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 *       the read lock is unnecessary.)
 *
 * RETURN VALUES:
- *      0       - success.
+ *      0       - success.
- *      -EIO    - i/o error.
+ *      -EIO    - i/o error.
 */
 static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
 {
@@ -2712,17 +2698,17 @@ static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
 }
 /*
- * NAME:        diFindFree()
+ * NAME:        diFindFree()
 *
- * FUNCTION:    find the first free bit in a word starting at
+ * FUNCTION:    find the first free bit in a word starting at
 *              the specified bit position.
 *
 * PARAMETERS:
- *      word    - word to be examined.
+ *      word    - word to be examined.
- *      start   - starting bit position.
+ *      start   - starting bit position.
 *
 * RETURN VALUES:
- *      bit position of first free bit in the word or 32 if
+ *      bit position of first free bit in the word or 32 if
 *      no free bits were found.
 */
 static int diFindFree(u32 word, int start)
@@ -2897,7 +2883,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
                   atomic_read(&imap->im_numfree));
        /*
-         *      reconstruct imap
+         *      reconstruct imap
         *
         * coalesce contiguous k (newAGSize/oldAGSize) AGs;
         * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
@@ -2913,7 +2899,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
        }
        /*
-         *      process each iag page of the map.
+         *      process each iag page of the map.
         *
         * rebuild AG Free Inode List, AG Free Inode Extent List;
         */
@@ -2932,7 +2918,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
                /* leave free iag in the free iag list */
                if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
-                        release_metapage(bp);
+                        release_metapage(bp);
                        continue;
                }
@@ -3063,13 +3049,13 @@ static void duplicateIXtree(struct super_block *sb, s64 blkno,
 }
 /*
- * NAME:        copy_from_dinode()
+ * NAME:        copy_from_dinode()
 *
- * FUNCTION:    Copies inode info from disk inode to in-memory inode
+ * FUNCTION:    Copies inode info from disk inode to in-memory inode
 *
 * RETURN VALUES:
- *      0       - success
+ *      0       - success
- *      -ENOMEM - insufficient memory
+ *      -ENOMEM - insufficient memory
 */
 static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 {
@@ -3151,9 +3137,9 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
 }
 /*
- * NAME:        copy_to_dinode()
+ * NAME:        copy_to_dinode()
 *
- * FUNCTION:    Copies inode info from in-memory inode to disk inode
+ * FUNCTION:    Copies inode info from in-memory inode to disk inode
 */
 static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 {
diff --git a/fs/jfs/jfs_imap.h b/fs/jfs/jfs_imap.h
index 4f9c346ed498..610a0e9d8941 100644
--- a/fs/jfs/jfs_imap.h
+++ b/fs/jfs/jfs_imap.h
@@ -24,17 +24,17 @@
 *      jfs_imap.h: disk inode manager
 */
-#define EXTSPERIAG      128     /* number of disk inode extent per iag  */
+#define EXTSPERIAG      128     /* number of disk inode extent per iag  */
-#define IMAPBLKNO       0       /* lblkno of dinomap within inode map   */
+#define IMAPBLKNO       0       /* lblkno of dinomap within inode map   */
-#define SMAPSZ          4       /* number of words per summary map      */
+#define SMAPSZ          4       /* number of words per summary map      */
 #define EXTSPERSUM      32      /* number of extents per summary map entry */
 #define L2EXTSPERSUM    5       /* l2 number of extents per summary map */
 #define PGSPERIEXT      4       /* number of 4K pages per dinode extent */
-#define MAXIAGS         ((1<<20)-1)     /* maximum number of iags       */
+#define MAXIAGS         ((1<<20)-1)     /* maximum number of iags       */
-#define MAXAG           128     /* maximum number of allocation groups  */
+#define MAXAG           128     /* maximum number of allocation groups  */
-#define AMAPSIZE      512       /* bytes in the IAG allocation maps */
+#define AMAPSIZE        512     /* bytes in the IAG allocation maps */
-#define SMAPSIZE      16        /* bytes in the IAG summary maps */
+#define SMAPSIZE        16      /* bytes in the IAG summary maps */
 /* convert inode number to iag number */
 #define INOTOIAG(ino)   ((ino) >> L2INOSPERIAG)
@@ -60,31 +60,31 @@
 *      inode allocation group page (per 4096 inodes of an AG)
 */
 struct iag {
-        __le64 agstart;         /* 8: starting block of ag              */
+        __le64 agstart;         /* 8: starting block of ag              */
-        __le32 iagnum;          /* 4: inode allocation group number     */
+        __le32 iagnum;          /* 4: inode allocation group number     */
-        __le32 inofreefwd;      /* 4: ag inode free list forward        */
+        __le32 inofreefwd;      /* 4: ag inode free list forward        */
-        __le32 inofreeback;     /* 4: ag inode free list back           */
+        __le32 inofreeback;     /* 4: ag inode free list back           */
-        __le32 extfreefwd;      /* 4: ag inode extent free list forward */
+        __le32 extfreefwd;      /* 4: ag inode extent free list forward */
-        __le32 extfreeback;     /* 4: ag inode extent free list back    */
+        __le32 extfreeback;     /* 4: ag inode extent free list back    */
-        __le32 iagfree;         /* 4: iag free list                     */
+        __le32 iagfree;         /* 4: iag free list                     */
        /* summary map: 1 bit per inode extent */
        __le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes;
-                                 *      note: this indicates free and backed
+                                 *      note: this indicates free and backed
-                                 *      inodes, if the extent is not backed the
+                                 *      inodes, if the extent is not backed the
-                                 *      value will be 1.  if the extent is
+                                 *      value will be 1.  if the extent is
-                                 *      backed but all inodes are being used the
+                                 *      backed but all inodes are being used the
-                                 *      value will be 1.  if the extent is
+                                 *      value will be 1.  if the extent is
-                                 *      backed but at least one of the inodes is
+                                 *      backed but at least one of the inodes is
-                                 *      free the value will be 0.
+                                 *      free the value will be 0.
                                 */
        __le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */
-        __le32 nfreeinos;               /* 4: number of free inodes             */
+        __le32 nfreeinos;       /* 4: number of free inodes             */
-        __le32 nfreeexts;               /* 4: number of free extents            */
+        __le32 nfreeexts;       /* 4: number of free extents            */
        /* (72) */
        u8 pad[1976];           /* 1976: pad to 2048 bytes */
        /* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
-        __le32 wmap[EXTSPERIAG];        /* 512: working allocation map  */
+        __le32 wmap[EXTSPERIAG];        /* 512: working allocation map */
        __le32 pmap[EXTSPERIAG];        /* 512: persistent allocation map */
        pxd_t inoext[EXTSPERIAG];       /* 1024: inode extent addresses */
 };                              /* (4096) */
@@ -93,44 +93,44 @@ struct iag {
 *      per AG control information (in inode map control page)
 */
 struct iagctl_disk {
-        __le32 inofree;         /* 4: free inode list anchor            */
+        __le32 inofree;         /* 4: free inode list anchor            */
-        __le32 extfree;         /* 4: free extent list anchor           */
+        __le32 extfree;         /* 4: free extent list anchor           */
-        __le32 numinos;         /* 4: number of backed inodes           */
+        __le32 numinos;         /* 4: number of backed inodes           */
-        __le32 numfree;         /* 4: number of free inodes             */
+        __le32 numfree;         /* 4: number of free inodes             */
 };                              /* (16) */
 struct iagctl {
-        int inofree;            /* free inode list anchor            */
+        int inofree;            /* free inode list anchor               */
-        int extfree;            /* free extent list anchor           */
+        int extfree;            /* free extent list anchor              */
-        int numinos;            /* number of backed inodes           */
+        int numinos;            /* number of backed inodes              */
-        int numfree;            /* number of free inodes             */
+        int numfree;            /* number of free inodes                */
 };
 /*
 *      per fileset/aggregate inode map control page
 */
 struct dinomap_disk {
-        __le32 in_freeiag;      /* 4: free iag list anchor     */
+        __le32 in_freeiag;      /* 4: free iag list anchor      */
-        __le32 in_nextiag;      /* 4: next free iag number     */
+        __le32 in_nextiag;      /* 4: next free iag number      */
-        __le32 in_numinos;      /* 4: num of backed inodes */
+        __le32 in_numinos;      /* 4: num of backed inodes      */
        __le32 in_numfree;      /* 4: num of free backed inodes */
        __le32 in_nbperiext;    /* 4: num of blocks per inode extent */
-        __le32 in_l2nbperiext;  /* 4: l2 of in_nbperiext */
+        __le32 in_l2nbperiext;  /* 4: l2 of in_nbperiext        */
-        __le32 in_diskblock;    /* 4: for standalone test driver  */
+        __le32 in_diskblock;    /* 4: for standalone test driver */
-        __le32 in_maxag;        /* 4: for standalone test driver  */
+        __le32 in_maxag;        /* 4: for standalone test driver */
-        u8 pad[2016];           /* 2016: pad to 2048 */
+        u8 pad[2016];           /* 2016: pad to 2048            */
        struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
 };                              /* (4096) */
 struct dinomap {
-        int in_freeiag;         /* free iag list anchor     */
+        int in_freeiag;         /* free iag list anchor         */
-        int in_nextiag;         /* next free iag number     */
+        int in_nextiag;         /* next free iag number         */
-        int in_numinos;         /* num of backed inodes */
+        int in_numinos;         /* num of backed inodes         */
-        int in_numfree;         /* num of free backed inodes */
+        int in_numfree;         /* num of free backed inodes    */
        int in_nbperiext;       /* num of blocks per inode extent */
-        int in_l2nbperiext;     /* l2 of in_nbperiext */
+        int in_l2nbperiext;     /* l2 of in_nbperiext           */
-        int in_diskblock;       /* for standalone test driver  */
+        int in_diskblock;       /* for standalone test driver   */
-        int in_maxag;           /* for standalone test driver  */
+        int in_maxag;           /* for standalone test driver   */
        struct iagctl in_agctl[MAXAG];  /* AG control information */
 };
@@ -139,9 +139,9 @@ struct dinomap {
 */
 struct inomap {
        struct dinomap im_imap;         /* 4096: inode allocation control */
-        struct inode *im_ipimap;        /* 4: ptr to inode for imap   */
+        struct inode *im_ipimap;        /* 4: ptr to inode for imap     */
-        struct mutex im_freelock;       /* 4: iag free list lock      */
+        struct mutex im_freelock;       /* 4: iag free list lock        */
-        struct mutex im_aglock[MAXAG];  /* 512: per AG locks          */
+        struct mutex im_aglock[MAXAG];  /* 512: per AG locks            */
        u32 *im_DBGdimap;
        atomic_t im_numinos;    /* num of backed inodes */
        atomic_t im_numfree;    /* num of free backed inodes */
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 8f453eff3c83..cb8f30985ad1 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -40,7 +40,7 @@ struct jfs_inode_info {
        uint    mode2;          /* jfs-specific mode            */
        uint    saved_uid;      /* saved for uid mount option */
        uint    saved_gid;      /* saved for gid mount option */
-        pxd_t   ixpxd;          /* inode extent descriptor      */
+        pxd_t   ixpxd;          /* inode extent descriptor      */
        dxd_t   acl;            /* dxd describing acl   */
        dxd_t   ea;             /* dxd describing ea    */
        time_t  otime;          /* time created */
@@ -190,7 +190,7 @@ struct jfs_sb_info {
        uint            gengen;         /* inode generation generator*/
        uint            inostamp;       /* shows inode belongs to fileset*/
-        /* Formerly in ipbmap */
+        /* Formerly in ipbmap */
        struct bmap     *bmap;          /* incore bmap descriptor       */
        struct nls_table *nls_tab;      /* current codepage             */
        struct inode *direct_inode;     /* metadata inode */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 44a2f33cb98d..de3e4a506dbc 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -244,7 +244,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                goto writeRecord;
        /*
-         *      initialize/update page/transaction recovery lsn
+         *      initialize/update page/transaction recovery lsn
         */
        lsn = log->lsn;
@@ -263,7 +263,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      initialize/update lsn of tblock of the page
+         *      initialize/update lsn of tblock of the page
         *
         * transaction inherits oldest lsn of pages associated
         * with allocation/deallocation of resources (their
@@ -307,7 +307,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        LOGSYNC_UNLOCK(log, flags);
        /*
-         *      write the log record
+         *      write the log record
         */
      writeRecord:
        lsn = lmWriteRecord(log, tblk, lrd, tlck);
@@ -372,7 +372,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                goto moveLrd;
        /*
-         *      move log record data
+         *      move log record data
         */
        /* retrieve source meta-data page to log */
        if (tlck->flag & tlckPAGELOCK) {
@@ -465,7 +465,7 @@ lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      move log record descriptor
+         *      move log record descriptor
         */
      moveLrd:
        lrd->length = cpu_to_le16(len);
@@ -574,7 +574,7 @@ static int lmNextPage(struct jfs_log * log)
        LOGGC_LOCK(log);
        /*
-         *      write or queue the full page at the tail of write queue
+         *      write or queue the full page at the tail of write queue
         */
        /* get the tail tblk on commit queue */
        if (list_empty(&log->cqueue))
@@ -625,7 +625,7 @@ static int lmNextPage(struct jfs_log * log)
        LOGGC_UNLOCK(log);
        /*
-         *      allocate/initialize next page
+         *      allocate/initialize next page
         */
        /* if log wraps, the first data page of log is 2
         * (0 never used, 1 is superblock).
@@ -953,7 +953,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
                }
        /*
-         *      forward syncpt
+         *      forward syncpt
         */
        /* if last sync is same as last syncpt,
         * invoke sync point forward processing to update sync.
@@ -989,7 +989,7 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
                lsn = log->lsn;
        /*
-         *      setup next syncpt trigger (SWAG)
+         *      setup next syncpt trigger (SWAG)
         */
        logsize = log->logsize;
@@ -1000,11 +1000,11 @@ static int lmLogSync(struct jfs_log * log, int hard_sync)
        if (more < 2 * LOGPSIZE) {
                jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
                /*
-                 *      log wrapping
+                 *      log wrapping
                 *
                 * option 1 - panic ? No.!
                 * option 2 - shutdown file systems
-                 *            associated with log ?
+                 *            associated with log ?
                 * option 3 - extend log ?
                 */
                /*
@@ -1062,7 +1062,7 @@ void jfs_syncpt(struct jfs_log *log, int hard_sync)
 /*
 * NAME:        lmLogOpen()
 *
- * FUNCTION:    open the log on first open;
+ * FUNCTION:    open the log on first open;
 *      insert filesystem in the active list of the log.
 *
 * PARAMETER:   ipmnt   - file system mount inode
@@ -1113,7 +1113,7 @@ int lmLogOpen(struct super_block *sb)
        init_waitqueue_head(&log->syncwait);
        /*
-         *      external log as separate logical volume
+         *      external log as separate logical volume
         *
         * file systems to log may have n-to-1 relationship;
         */
@@ -1155,7 +1155,7 @@ journal_found:
        return 0;
        /*
-         *      unwind on error
+         *      unwind on error
         */
      shutdown:         /* unwind lbmLogInit() */
        list_del(&log->journal_list);
@@ -1427,7 +1427,7 @@ int lmLogInit(struct jfs_log * log)
        return 0;
        /*
-         *      unwind on error
+         *      unwind on error
         */
      errout30:         /* release log page */
        log->wqueue = NULL;
@@ -1480,7 +1480,7 @@ int lmLogClose(struct super_block *sb)
        if (test_bit(log_INLINELOG, &log->flag)) {
                /*
-                 *      in-line log in host file system
+                 *      in-line log in host file system
                 */
                rc = lmLogShutdown(log);
                kfree(log);
@@ -1504,7 +1504,7 @@ int lmLogClose(struct super_block *sb)
                goto out;
        /*
-         *      external log as separate logical volume
+         *      external log as separate logical volume
         */
        list_del(&log->journal_list);
        bdev = log->bdev;
@@ -1622,20 +1622,26 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
        if (!list_empty(&log->synclist)) {
                struct logsyncblk *lp;
+                printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
                list_for_each_entry(lp, &log->synclist, synclist) {
                        if (lp->xflag & COMMIT_PAGE) {
                                struct metapage *mp = (struct metapage *)lp;
-                                dump_mem("orphan metapage", lp,
+                                print_hex_dump(KERN_ERR, "metapage: ",
-                                         sizeof(struct metapage));
+                                               DUMP_PREFIX_ADDRESS, 16, 4,
-                                dump_mem("page", mp->page, sizeof(struct page));
+                                               mp, sizeof(struct metapage), 0);
-                        }
+                                print_hex_dump(KERN_ERR, "page: ",
-                        else
+                                               DUMP_PREFIX_ADDRESS, 16,
-                                dump_mem("orphan tblock", lp,
+                                               sizeof(long), mp->page,
-                                         sizeof(struct tblock));
+                                               sizeof(struct page), 0);
+                        } else
+                                print_hex_dump(KERN_ERR, "tblock:",
+                                               DUMP_PREFIX_ADDRESS, 16, 4,
+                                               lp, sizeof(struct tblock), 0);
                }
        }
+#else
+        WARN_ON(!list_empty(&log->synclist));
 #endif
-        //assert(list_empty(&log->synclist));
        clear_bit(log_FLUSH, &log->flag);
 }
@@ -1723,7 +1729,7 @@ int lmLogShutdown(struct jfs_log * log)
 *
 * PARAMETE:    log     - pointer to logs inode.
 *              fsdev   - kdev_t of filesystem.
- *              serial  - pointer to returned log serial number
+ *              serial  - pointer to returned log serial number
 *              activate - insert/remove device from active list.
 *
 * RETURN:      0       - success
@@ -1963,7 +1969,7 @@ static void lbmfree(struct lbuf * bp)
 * FUNCTION:    add a log buffer to the log redrive list
 *
 * PARAMETER:
- *     bp       - log buffer
+ *      bp      - log buffer
 *
 * NOTES:
 *      Takes log_redrive_lock.
@@ -2054,7 +2060,7 @@ static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
        bp->l_flag = flag;
        /*
-         *      insert bp at tail of write queue associated with log
+         *      insert bp at tail of write queue associated with log
         *
         * (request is either for bp already/currently at head of queue
         * or new bp to be inserted at tail)
@@ -2117,7 +2123,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
            log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
        /*
-         *      initiate pageout of the page
+         *      initiate pageout of the page
         */
        lbmStartIO(bp);
 }
@@ -2128,7 +2134,7 @@ static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
 *
 * FUNCTION:    Interface to DD strategy routine
 *
- * RETURN:      none
+ * RETURN:      none
 *
 * serialization: LCACHE_LOCK() is NOT held during log i/o;
 */
@@ -2222,7 +2228,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        bio_put(bio);
        /*
-         *      pagein completion
+         *      pagein completion
         */
        if (bp->l_flag & lbmREAD) {
                bp->l_flag &= ~lbmREAD;
@@ -2236,7 +2242,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      pageout completion
+         *      pageout completion
         *
         * the bp at the head of write queue has completed pageout.
         *
@@ -2302,7 +2308,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      synchronous pageout:
+         *      synchronous pageout:
         *
         * buffer has not necessarily been removed from write queue
         * (e.g., synchronous write of partial-page with COMMIT):
@@ -2316,7 +2322,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      Group Commit pageout:
+         *      Group Commit pageout:
         */
        else if (bp->l_flag & lbmGC) {
                LCACHE_UNLOCK(flags);
@@ -2324,7 +2330,7 @@ static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error)
        }
        /*
-         *      asynchronous pageout:
+         *      asynchronous pageout:
         *
         * buffer must have been removed from write queue:
         * insert buffer at head of freelist where it can be recycled
@@ -2375,7 +2381,7 @@ int jfsIOWait(void *arg)
 * FUNCTION:    format file system log
 *
 * PARAMETERS:
- *      log     - volume log
+ *      log     - volume log
 *      logAddress - start address of log space in FS block
 *      logSize - length of log space in FS block;
 *
@@ -2407,16 +2413,16 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
        npages = logSize >> sbi->l2nbperpage;
        /*
-         *      log space:
+         *      log space:
         *
         * page 0 - reserved;
         * page 1 - log superblock;
         * page 2 - log data page: A SYNC log record is written
-         *          into this page at logform time;
+         *          into this page at logform time;
         * pages 3-N - log data page: set to empty log data pages;
         */
        /*
-         *      init log superblock: log page 1
+         *      init log superblock: log page 1
         */
        logsuper = (struct logsuper *) bp->l_ldata;
@@ -2436,7 +2442,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
                goto exit;
        /*
-         *      init pages 2 to npages-1 as log data pages:
+         *      init pages 2 to npages-1 as log data pages:
         *
         * log page sequence number (lpsn) initialization:
         *
@@ -2479,7 +2485,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
                goto exit;
        /*
-         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
+         *      initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
         */
        for (lspn = 0; lspn < npages - 3; lspn++) {
                lp->h.page = lp->t.page = cpu_to_le32(lspn);
@@ -2495,7 +2501,7 @@ int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
        rc = 0;
 exit:
        /*
-         *      finalize log
+         *      finalize log
         */
        /* release the buffer */
        lbmFree(bp);
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index a53fb17ea219..1f85ef0ec045 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -144,7 +144,7 @@ struct logpage {
 *
 * (this comment should be rewritten !)
 * jfs uses only "after" log records (only a single writer is allowed
- * in a  page, pages are written to temporary paging space if
+ * in a page, pages are written to temporary paging space if
 * if they must be written to disk before commit, and i/o is
 * scheduled for modified pages to their home location after
 * the log records containing the after values and the commit
@@ -153,7 +153,7 @@ struct logpage {
 *
 * a log record consists of a data area of variable length followed by
 * a descriptor of fixed size LOGRDSIZE bytes.
- * the  data area is rounded up to an integral number of 4-bytes and
+ * the data area is rounded up to an integral number of 4-bytes and
 * must be no longer than LOGPSIZE.
 * the descriptor is of size of multiple of 4-bytes and aligned on a
 * 4-byte boundary.
@@ -215,13 +215,13 @@ struct lrd {
        union {
                /*
-                 *      COMMIT: commit
+                 *      COMMIT: commit
                 *
                 * transaction commit: no type-dependent information;
                 */
                /*
-                 *      REDOPAGE: after-image
+                 *      REDOPAGE: after-image
                 *
                 * apply after-image;
                 *
@@ -236,7 +236,7 @@ struct lrd {
                } redopage;     /* (20) */
                /*
-                 *      NOREDOPAGE: the page is freed
+                 *      NOREDOPAGE: the page is freed
                 *
                 * do not apply after-image records which precede this record
                 * in the log with the same page block number to this page.
@@ -252,7 +252,7 @@ struct lrd {
                } noredopage;   /* (20) */
                /*
-                 *      UPDATEMAP: update block allocation map
+                 *      UPDATEMAP: update block allocation map
                 *
                 * either in-line PXD,
                 * or     out-of-line  XADLIST;
@@ -268,7 +268,7 @@ struct lrd {
                } updatemap;    /* (20) */
                /*
-                 *      NOREDOINOEXT: the inode extent is freed
+                 *      NOREDOINOEXT: the inode extent is freed
                 *
                 * do not apply after-image records which precede this
                 * record in the log with the any of the 4 page block
@@ -286,7 +286,7 @@ struct lrd {
                } noredoinoext; /* (20) */
                /*
-                 *      SYNCPT: log sync point
+                 *      SYNCPT: log sync point
                 *
                 * replay log upto syncpt address specified;
                 */
@@ -295,13 +295,13 @@ struct lrd {
                } syncpt;
                /*
-                 *      MOUNT: file system mount
+                 *      MOUNT: file system mount
                 *
                 * file system mount: no type-dependent information;
                 */
                /*
-                 *      ? FREEXTENT: free specified extent(s)
+                 *      ? FREEXTENT: free specified extent(s)
                 *
                 * free specified extent(s) from block allocation map
                 * N.B.: nextents should be length of data/sizeof(xad_t)
@@ -314,7 +314,7 @@ struct lrd {
                } freextent;
                /*
-                 *      ? NOREDOFILE: this file is freed
+                 *      ? NOREDOFILE: this file is freed
                 *
                 * do not apply records which precede this record in the log
                 * with the same inode number.
@@ -330,7 +330,7 @@ struct lrd {
                } noredofile;
                /*
-                 *      ? NEWPAGE:
+                 *      ? NEWPAGE:
                 *
                 * metadata type dependent
                 */
@@ -342,7 +342,7 @@ struct lrd {
                } newpage;
                /*
-                 *      ? DUMMY: filler
+                 *      ? DUMMY: filler
                 *
                 * no type-dependent information
                 */
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 43d4f69afbec..77c7f1129dde 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -472,7 +472,8 @@ add_failed:
        printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
        goto skip;
 dump_bio:
-        dump_mem("bio", bio, sizeof(*bio));
+        print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
+                       4, bio, sizeof(*bio), 0);
 skip:
        bio_put(bio);
        unlock_page(page);
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index 4dd479834897..644429acb8c0 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -80,7 +80,7 @@ static int logMOUNT(struct super_block *sb);
 */
 int jfs_mount(struct super_block *sb)
 {
-        int rc = 0;             /* Return code          */
+        int rc = 0;             /* Return code */
        struct jfs_sb_info *sbi = JFS_SBI(sb);
        struct inode *ipaimap = NULL;
        struct inode *ipaimap2 = NULL;
@@ -169,7 +169,7 @@ int jfs_mount(struct super_block *sb)
                sbi->ipaimap2 = NULL;
        /*
-         *      mount (the only/single) fileset
+         *      mount (the only/single) fileset
         */
        /*
         * open fileset inode allocation map (aka fileset inode)
@@ -195,7 +195,7 @@ int jfs_mount(struct super_block *sb)
        goto out;
        /*
-         *      unwind on error
+         *      unwind on error
         */
      errout41:         /* close fileset inode allocation map inode */
        diFreeSpecial(ipimap);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 25430d0b0d59..7aa1f7004eaf 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -18,7 +18,7 @@
 */
 /*
- *      jfs_txnmgr.c: transaction manager
+ *      jfs_txnmgr.c: transaction manager
 *
 * notes:
 * transaction starts with txBegin() and ends with txCommit()
@@ -60,7 +60,7 @@
 #include "jfs_debug.h"
 /*
- *      transaction management structures
+ *      transaction management structures
 */
 static struct {
        int freetid;            /* index of a free tid structure */
@@ -103,19 +103,19 @@ module_param(nTxLock, int, 0);
 MODULE_PARM_DESC(nTxLock,
                 "Number of transaction locks (max:65536)");
-struct tblock *TxBlock;         /* transaction block table */
+struct tblock *TxBlock; /* transaction block table */
-static int TxLockLWM;           /* Low water mark for number of txLocks used */
+static int TxLockLWM;   /* Low water mark for number of txLocks used */
-static int TxLockHWM;           /* High water mark for number of txLocks used */
+static int TxLockHWM;   /* High water mark for number of txLocks used */
-static int TxLockVHWM;          /* Very High water mark */
+static int TxLockVHWM;  /* Very High water mark */
-struct tlock *TxLock;           /* transaction lock table */
+struct tlock *TxLock;   /* transaction lock table */
 /*
- *      transaction management lock
+ *      transaction management lock
 */
 static DEFINE_SPINLOCK(jfsTxnLock);
-#define TXN_LOCK()              spin_lock(&jfsTxnLock)
+#define TXN_LOCK()              spin_lock(&jfsTxnLock)
-#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
+#define TXN_UNLOCK()            spin_unlock(&jfsTxnLock)
 #define LAZY_LOCK_INIT()        spin_lock_init(&TxAnchor.LazyLock);
 #define LAZY_LOCK(flags)        spin_lock_irqsave(&TxAnchor.LazyLock, flags)
@@ -148,7 +148,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
 #define TXN_WAKEUP(event) wake_up_all(event)
 /*
- *      statistics
+ *      statistics
 */
 static struct {
        tid_t maxtid;           /* 4: biggest tid ever used */
@@ -181,8 +181,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 static void LogSyncRelease(struct metapage * mp);
 /*
- *              transaction block/lock management
+ *              transaction block/lock management
- *              ---------------------------------
+ *              ---------------------------------
 */
 /*
@@ -227,9 +227,9 @@ static void txLockFree(lid_t lid)
 }
 /*
- * NAME:        txInit()
+ * NAME:        txInit()
 *
- * FUNCTION:    initialize transaction management structures
+ * FUNCTION:    initialize transaction management structures
 *
 * RETURN:
 *
@@ -333,9 +333,9 @@ int txInit(void)
 }
 /*
- * NAME:        txExit()
+ * NAME:        txExit()
 *
- * FUNCTION:    clean up when module is unloaded
+ * FUNCTION:    clean up when module is unloaded
 */
 void txExit(void)
 {
@@ -346,12 +346,12 @@ void txExit(void)
 }
 /*
- * NAME:        txBegin()
+ * NAME:        txBegin()
 *
- * FUNCTION:    start a transaction.
+ * FUNCTION:    start a transaction.
 *
- * PARAMETER:   sb      - superblock
+ * PARAMETER:   sb      - superblock
- *              flag    - force for nested tx;
+ *              flag    - force for nested tx;
 *
 * RETURN:      tid     - transaction id
 *
@@ -447,13 +447,13 @@ tid_t txBegin(struct super_block *sb, int flag)
 }
 /*
- * NAME:        txBeginAnon()
+ * NAME:        txBeginAnon()
 *
- * FUNCTION:    start an anonymous transaction.
+ * FUNCTION:    start an anonymous transaction.
 *              Blocks if logsync or available tlocks are low to prevent
 *              anonymous tlocks from depleting supply.
 *
- * PARAMETER:   sb      - superblock
+ * PARAMETER:   sb      - superblock
 *
 * RETURN:      none
 */
@@ -489,11 +489,11 @@ void txBeginAnon(struct super_block *sb)
 }
 /*
- *      txEnd()
+ *      txEnd()
 *
 * function: free specified transaction block.
 *
- *      logsync barrier processing:
+ *      logsync barrier processing:
 *
 * serialization:
 */
@@ -577,13 +577,13 @@ wakeup:
 }
 /*
- *      txLock()
+ *      txLock()
 *
 * function: acquire a transaction lock on the specified <mp>
 *
 * parameter:
 *
- * return:      transaction lock id
+ * return:      transaction lock id
 *
 * serialization:
 */
@@ -829,12 +829,16 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
        /* Only locks on ipimap or ipaimap should reach here */
        /* assert(jfs_ip->fileset == AGGREGATE_I); */
        if (jfs_ip->fileset != AGGREGATE_I) {
-                jfs_err("txLock: trying to lock locked page!");
+                printk(KERN_ERR "txLock: trying to lock locked page!");
-                dump_mem("ip", ip, sizeof(struct inode));
+                print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4,
-                dump_mem("mp", mp, sizeof(struct metapage));
+                               ip, sizeof(*ip), 0);
-                dump_mem("Locker's tblk", tid_to_tblock(tid),
+                print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4,
-                         sizeof(struct tblock));
+                               mp, sizeof(*mp), 0);
-                dump_mem("Tlock", tlck, sizeof(struct tlock));
+                print_hex_dump(KERN_ERR, "Locker's tblock: ",
+                               DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid),
+                               sizeof(struct tblock), 0);
+                print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4,
+                               tlck, sizeof(*tlck), 0);
                BUG();
        }
        INCREMENT(stattx.waitlock);     /* statistics */
@@ -857,17 +861,17 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 }
 /*
- * NAME:        txRelease()
+ * NAME:        txRelease()
 *
- * FUNCTION:    Release buffers associated with transaction locks, but don't
+ * FUNCTION:    Release buffers associated with transaction locks, but don't
 *              mark homeok yet.  The allows other transactions to modify
 *              buffers, but won't let them go to disk until commit record
 *              actually gets written.
 *
 * PARAMETER:
- *              tblk    -
+ *              tblk    -
 *
- * RETURN:      Errors from subroutines.
+ * RETURN:      Errors from subroutines.
 */
 static void txRelease(struct tblock * tblk)
 {
@@ -896,10 +900,10 @@ static void txRelease(struct tblock * tblk)
 }
 /*
- * NAME:        txUnlock()
+ * NAME:        txUnlock()
 *
- * FUNCTION:    Initiates pageout of pages modified by tid in journalled
+ * FUNCTION:    Initiates pageout of pages modified by tid in journalled
- *              objects and frees their lockwords.
+ *              objects and frees their lockwords.
 */
 static void txUnlock(struct tblock * tblk)
 {
@@ -983,10 +987,10 @@ static void txUnlock(struct tblock * tblk)
 }
 /*
- *      txMaplock()
+ *      txMaplock()
 *
 * function: allocate a transaction lock for freed page/entry;
- *      for freed page, maplock is used as xtlock/dtlock type;
+ *      for freed page, maplock is used as xtlock/dtlock type;
 */
 struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 {
@@ -1057,7 +1061,7 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type)
 }
 /*
- *      txLinelock()
+ *      txLinelock()
 *
 * function: allocate a transaction lock for log vector list
 */
@@ -1092,39 +1096,39 @@ struct linelock *txLinelock(struct linelock * tlock)
 }
 /*
- *              transaction commit management
+ *              transaction commit management
- *              -----------------------------
+ *              -----------------------------
 */
 /*
- * NAME:        txCommit()
+ * NAME:        txCommit()
- *
+ *
- * FUNCTION:    commit the changes to the objects specified in
+ * FUNCTION:    commit the changes to the objects specified in
- *              clist.  For journalled segments only the
+ *              clist.  For journalled segments only the
- *              changes of the caller are committed, ie by tid.
+ *              changes of the caller are committed, ie by tid.
- *              for non-journalled segments the data are flushed to
+ *              for non-journalled segments the data are flushed to
- *              disk and then the change to the disk inode and indirect
+ *              disk and then the change to the disk inode and indirect
- *              blocks committed (so blocks newly allocated to the
+ *              blocks committed (so blocks newly allocated to the
- *              segment will be made a part of the segment atomically).
+ *              segment will be made a part of the segment atomically).
- *
+ *
- *              all of the segments specified in clist must be in
+ *              all of the segments specified in clist must be in
- *              one file system. no more than 6 segments are needed
+ *              one file system. no more than 6 segments are needed
- *              to handle all unix svcs.
+ *              to handle all unix svcs.
- *
+ *
- *              if the i_nlink field (i.e. disk inode link count)
+ *              if the i_nlink field (i.e. disk inode link count)
- *              is zero, and the type of inode is a regular file or
+ *              is zero, and the type of inode is a regular file or
- *              directory, or symbolic link , the inode is truncated
+ *              directory, or symbolic link , the inode is truncated
- *              to zero length. the truncation is committed but the
+ *              to zero length. the truncation is committed but the
- *              VM resources are unaffected until it is closed (see
+ *              VM resources are unaffected until it is closed (see
- *              iput and iclose).
+ *              iput and iclose).
 *
 * PARAMETER:
 *
 * RETURN:
 *
 * serialization:
- *              on entry the inode lock on each segment is assumed
+ *              on entry the inode lock on each segment is assumed
- *              to be held.
+ *              to be held.
 *
 * i/o error:
 */
@@ -1175,7 +1179,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0)
                tblk->xflag |= COMMIT_LAZY;
        /*
-         *      prepare non-journaled objects for commit
+         *      prepare non-journaled objects for commit
         *
         * flush data pages of non-journaled file
         * to prevent the file getting non-initialized disk blocks
@@ -1186,7 +1190,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        cd.nip = nip;
        /*
-         *      acquire transaction lock on (on-disk) inodes
+         *      acquire transaction lock on (on-disk) inodes
         *
         * update on-disk inode from in-memory inode
         * acquiring transaction locks for AFTER records
@@ -1262,7 +1266,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        }
        /*
-         *      write log records from transaction locks
+         *      write log records from transaction locks
         *
         * txUpdateMap() resets XAD_NEW in XAD.
         */
@@ -1294,7 +1298,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
                !test_cflag(COMMIT_Nolink, tblk->u.ip)));
        /*
-         *      write COMMIT log record
+         *      write COMMIT log record
         */
        lrd->type = cpu_to_le16(LOG_COMMIT);
        lrd->length = 0;
@@ -1303,7 +1307,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        lmGroupCommit(log, tblk);
        /*
-         *      - transaction is now committed -
+         *      - transaction is now committed -
         */
        /*
@@ -1314,11 +1318,11 @@ int txCommit(tid_t tid,		/* transaction identifier */
                txForce(tblk);
        /*
-         *      update allocation map.
+         *      update allocation map.
         *
         * update inode allocation map and inode:
         * free pager lock on memory object of inode if any.
-         * update  block allocation map.
+         * update block allocation map.
         *
         * txUpdateMap() resets XAD_NEW in XAD.
         */
@@ -1326,7 +1330,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
                txUpdateMap(tblk);
        /*
-         *      free transaction locks and pageout/free pages
+         *      free transaction locks and pageout/free pages
         */
        txRelease(tblk);
@@ -1335,7 +1339,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
        /*
-         *      reset in-memory object state
+         *      reset in-memory object state
         */
        for (k = 0; k < cd.nip; k++) {
                ip = cd.iplist[k];
@@ -1358,11 +1362,11 @@ int txCommit(tid_t tid,		/* transaction identifier */
 }
 /*
- * NAME:        txLog()
+ * NAME:        txLog()
 *
- * FUNCTION:    Writes AFTER log records for all lines modified
+ * FUNCTION:    Writes AFTER log records for all lines modified
- *              by tid for segments specified by inodes in comdata.
+ *              by tid for segments specified by inodes in comdata.
- *              Code assumes only WRITELOCKS are recorded in lockwords.
+ *              Code assumes only WRITELOCKS are recorded in lockwords.
 *
 * PARAMETERS:
 *
@@ -1421,12 +1425,12 @@ static int txLog(struct jfs_log * log, struct tblock * tblk, struct commit * cd)
 }
 /*
- *      diLog()
+ *      diLog()
 *
- * function:    log inode tlock and format maplock to update bmap;
+ * function:    log inode tlock and format maplock to update bmap;
 */
 static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
-          struct tlock * tlck, struct commit * cd)
+                 struct tlock * tlck, struct commit * cd)
 {
        int rc = 0;
        struct metapage *mp;
@@ -1442,7 +1446,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        pxd = &lrd->log.redopage.pxd;
        /*
-         *      inode after image
+         *      inode after image
         */
        if (tlck->type & tlckENTRY) {
                /* log after-image for logredo(): */
@@ -1456,7 +1460,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                tlck->flag |= tlckWRITEPAGE;
        } else if (tlck->type & tlckFREE) {
                /*
-                 *      free inode extent
+                 *      free inode extent
                 *
                 * (pages of the freed inode extent have been invalidated and
                 * a maplock for free of the extent has been formatted at
@@ -1498,7 +1502,7 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                jfs_err("diLog: UFO type tlck:0x%p", tlck);
 #ifdef  _JFS_WIP
        /*
-         *      alloc/free external EA extent
+         *      alloc/free external EA extent
         *
         * a maplock for txUpdateMap() to update bPWMAP for alloc/free
         * of the extent has been formatted at txLock() time;
@@ -1534,9 +1538,9 @@ static int diLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      dataLog()
+ *      dataLog()
 *
- * function:    log data tlock
+ * function:    log data tlock
 */
 static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
            struct tlock * tlck)
@@ -1580,9 +1584,9 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      dtLog()
+ *      dtLog()
 *
- * function:    log dtree tlock and format maplock to update bmap;
+ * function:    log dtree tlock and format maplock to update bmap;
 */
 static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
           struct tlock * tlck)
@@ -1603,10 +1607,10 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
        /*
-         *      page extension via relocation: entry insertion;
+         *      page extension via relocation: entry insertion;
-         *      page extension in-place: entry insertion;
+         *      page extension in-place: entry insertion;
-         *      new right page from page split, reinitialized in-line
+         *      new right page from page split, reinitialized in-line
-         *      root from root page split: entry insertion;
+         *      root from root page split: entry insertion;
         */
        if (tlck->type & (tlckNEW | tlckEXTEND)) {
                /* log after-image of the new page for logredo():
@@ -1641,8 +1645,8 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      entry insertion/deletion,
+         *      entry insertion/deletion,
-         *      sibling page link update (old right page before split);
+         *      sibling page link update (old right page before split);
         */
        if (tlck->type & (tlckENTRY | tlckRELINK)) {
                /* log after-image for logredo(): */
@@ -1658,11 +1662,11 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      page deletion: page has been invalidated
+         *      page deletion: page has been invalidated
-         *      page relocation: source extent
+         *      page relocation: source extent
         *
-         *      a maplock for free of the page has been formatted
+         *      a maplock for free of the page has been formatted
-         *      at txLock() time);
+         *      at txLock() time);
         */
        if (tlck->type & (tlckFREE | tlckRELOCATE)) {
                /* log LOG_NOREDOPAGE of the deleted page for logredo()
@@ -1683,9 +1687,9 @@ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      xtLog()
+ *      xtLog()
 *
- * function:    log xtree tlock and format maplock to update bmap;
+ * function:    log xtree tlock and format maplock to update bmap;
 */
 static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
           struct tlock * tlck)
@@ -1725,8 +1729,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        xadlock = (struct xdlistlock *) maplock;
        /*
-         *      entry insertion/extension;
+         *      entry insertion/extension;
-         *      sibling page link update (old right page before split);
+         *      sibling page link update (old right page before split);
         */
        if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) {
                /* log after-image for logredo():
@@ -1801,7 +1805,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      page deletion: file deletion/truncation (ref. xtTruncate())
+         *      page deletion: file deletion/truncation (ref. xtTruncate())
         *
         * (page will be invalidated after log is written and bmap
         * is updated from the page);
@@ -1908,13 +1912,13 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        }
        /*
-         *      page/entry truncation: file truncation (ref. xtTruncate())
+         *      page/entry truncation: file truncation (ref. xtTruncate())
         *
-         *     |----------+------+------+---------------|
+         *      |----------+------+------+---------------|
-         *                |      |      |
+         *                 |      |      |
-         *                |      |     hwm - hwm before truncation
+         *                 |      |     hwm - hwm before truncation
-         *                |     next - truncation point
+         *                 |     next - truncation point
-         *               lwm - lwm before truncation
+         *                lwm - lwm before truncation
         * header ?
         */
        if (tlck->type & tlckTRUNCATE) {
@@ -1937,7 +1941,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                twm = xtlck->twm.offset;
                /*
-                 *      write log records
+                 *      write log records
                 */
                /* log after-image for logredo():
                 *
@@ -1997,7 +2001,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                }
                /*
-                 *      format maplock(s) for txUpdateMap() to update bmap
+                 *      format maplock(s) for txUpdateMap() to update bmap
                 */
                maplock->index = 0;
@@ -2069,9 +2073,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      mapLog()
+ *      mapLog()
 *
- * function:    log from maplock of freed data extents;
+ * function:    log from maplock of freed data extents;
 */
 static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
                   struct tlock * tlck)
@@ -2081,7 +2085,7 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
        pxd_t *pxd;
        /*
-         *      page relocation: free the source page extent
+         *      page relocation: free the source page extent
         *
         * a maplock for txUpdateMap() for free of the page
         * has been formatted at txLock() time saving the src
@@ -2155,10 +2159,10 @@ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 }
 /*
- *      txEA()
+ *      txEA()
 *
- * function:    acquire maplock for EA/ACL extents or
+ * function:    acquire maplock for EA/ACL extents or
- *              set COMMIT_INLINE flag;
+ *              set COMMIT_INLINE flag;
 */
 void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 {
@@ -2207,10 +2211,10 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea)
 }
 /*
- *      txForce()
+ *      txForce()
 *
 * function: synchronously write pages locked by transaction
- *              after txLog() but before txUpdateMap();
+ *           after txLog() but before txUpdateMap();
 */
 static void txForce(struct tblock * tblk)
 {
@@ -2273,10 +2277,10 @@ static void txForce(struct tblock * tblk)
 }
 /*
- *      txUpdateMap()
+ *      txUpdateMap()
 *
- * function:    update persistent allocation map (and working map
+ * function:    update persistent allocation map (and working map
- *              if appropriate);
+ *              if appropriate);
 *
 * parameter:
 */
@@ -2298,7 +2302,7 @@ static void txUpdateMap(struct tblock * tblk)
        /*
-         *      update block allocation map
+         *      update block allocation map
         *
         * update allocation state in pmap (and wmap) and
         * update lsn of the pmap page;
@@ -2382,7 +2386,7 @@ static void txUpdateMap(struct tblock * tblk)
                }
        }
        /*
-         *      update inode allocation map
+         *      update inode allocation map
         *
         * update allocation state in pmap and
         * update lsn of the pmap page;
@@ -2407,24 +2411,24 @@ static void txUpdateMap(struct tblock * tblk)
 }
 /*
- *      txAllocPMap()
+ *      txAllocPMap()
 *
 * function: allocate from persistent map;
 *
 * parameter:
- *      ipbmap  -
+ *      ipbmap  -
- *      malock -
+ *      malock  -
- *              xad list:
+ *              xad list:
- *              pxd:
+ *              pxd:
- *
+ *
- *      maptype -
+ *      maptype -
- *              allocate from persistent map;
+ *              allocate from persistent map;
- *              free from persistent map;
+ *              free from persistent map;
- *              (e.g., tmp file - free from working map at releae
+ *              (e.g., tmp file - free from working map at releae
- *               of last reference);
+ *               of last reference);
- *              free from persistent and working map;
+ *              free from persistent and working map;
- *
+ *
- *      lsn     - log sequence number;
+ *      lsn     - log sequence number;
 */
 static void txAllocPMap(struct inode *ip, struct maplock * maplock,
                        struct tblock * tblk)
@@ -2478,9 +2482,9 @@ static void txAllocPMap(struct inode *ip, struct maplock * maplock,
 }
 /*
- *      txFreeMap()
+ *      txFreeMap()
 *
- * function:    free from persistent and/or working map;
+ * function:    free from persistent and/or working map;
 *
 * todo: optimization
 */
@@ -2579,9 +2583,9 @@ void txFreeMap(struct inode *ip,
 }
 /*
- *      txFreelock()
+ *      txFreelock()
 *
- * function:    remove tlock from inode anonymous locklist
+ * function:    remove tlock from inode anonymous locklist
 */
 void txFreelock(struct inode *ip)
 {
@@ -2619,7 +2623,7 @@ void txFreelock(struct inode *ip)
 }
 /*
- *      txAbort()
+ *      txAbort()
 *
 * function: abort tx before commit;
 *
@@ -2679,7 +2683,7 @@ void txAbort(tid_t tid, int dirty)
 }
 /*
- *      txLazyCommit(void)
+ *      txLazyCommit(void)
 *
 *      All transactions except those changing ipimap (COMMIT_FORCE) are
 *      processed by this routine.  This insures that the inode and block
@@ -2728,7 +2732,7 @@ static void txLazyCommit(struct tblock * tblk)
 }
 /*
- *      jfs_lazycommit(void)
+ *      jfs_lazycommit(void)
 *
 *      To be run as a kernel daemon.  If lbmIODone is called in an interrupt
 *      context, or where blocking is not wanted, this routine will process
@@ -2913,7 +2917,7 @@ void txResume(struct super_block *sb)
 }
 /*
- *      jfs_sync(void)
+ *      jfs_sync(void)
 *
 *      To be run as a kernel daemon.  This is awakened when tlocks run low.
 *      We write any inodes that have anonymous tlocks so they will become
diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h
index 7863cf21afca..ab7288937019 100644
--- a/fs/jfs/jfs_txnmgr.h
+++ b/fs/jfs/jfs_txnmgr.h
@@ -94,7 +94,7 @@ extern struct tblock *TxBlock;	/* transaction block table */
 */
 struct tlock {
        lid_t next;             /* 2: index next lockword on tid locklist
-                                 *          next lockword on freelist
+                                 *          next lockword on freelist
                                 */
        tid_t tid;              /* 2: transaction id holding lock */
diff --git a/fs/jfs/jfs_types.h b/fs/jfs/jfs_types.h
index 09b252958687..649f9817accd 100644
--- a/fs/jfs/jfs_types.h
+++ b/fs/jfs/jfs_types.h
@@ -21,7 +21,7 @@
 /*
 *      jfs_types.h:
 *
- * basic type/utility  definitions
+ * basic type/utility definitions
 *
 * note: this header file must be the 1st include file
 * of JFS include list in all JFS .c file.
@@ -54,8 +54,8 @@ struct timestruc_t {
 */
 #define LEFTMOSTONE     0x80000000
-#define HIGHORDER       0x80000000u     /* high order bit on            */
+#define HIGHORDER       0x80000000u     /* high order bit on    */
-#define ONES            0xffffffffu     /* all bit on                   */
+#define ONES            0xffffffffu     /* all bit on           */
 /*
 *      logical xd (lxd)
@@ -148,7 +148,7 @@ typedef struct {
 #define sizeDXD(dxd)    le32_to_cpu((dxd)->size)
 /*
- *      directory entry argument
+ *      directory entry argument
 */
 struct component_name {
        int namlen;
@@ -160,14 +160,14 @@ struct component_name {
 *      DASD limit information - stored in directory inode
 */
 struct dasd {
-        u8 thresh;              /* Alert Threshold (in percent) */
+        u8 thresh;              /* Alert Threshold (in percent)         */
-        u8 delta;               /* Alert Threshold delta (in percent)   */
+        u8 delta;               /* Alert Threshold delta (in percent)   */
        u8 rsrvd1;
-        u8 limit_hi;            /* DASD limit (in logical blocks)       */
+        u8 limit_hi;            /* DASD limit (in logical blocks)       */
-        __le32 limit_lo;        /* DASD limit (in logical blocks)       */
+        __le32 limit_lo;        /* DASD limit (in logical blocks)       */
        u8 rsrvd2[3];
-        u8 used_hi;             /* DASD usage (in logical blocks)       */
+        u8 used_hi;             /* DASD usage (in logical blocks)       */
-        __le32 used_lo;         /* DASD usage (in logical blocks)       */
+        __le32 used_lo;         /* DASD usage (in logical blocks)       */
 };
 #define DASDLIMIT(dasdp) \
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index a386f48c73fc..7971f37534a3 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -60,7 +60,7 @@ int jfs_umount(struct super_block *sb)
        jfs_info("UnMount JFS: sb:0x%p", sb);
        /*
-         *      update superblock and close log
+         *      update superblock and close log
         *
         * if mounted read-write and log based recovery was enabled
         */
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index acc97c46d8a4..1543906a2e0d 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -16,7 +16,7 @@
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */
 /*
- *      jfs_xtree.c: extent allocation descriptor B+-tree manager
+ *      jfs_xtree.c: extent allocation descriptor B+-tree manager
 */
 #include <linux/fs.h>
@@ -32,30 +32,30 @@
 /*
 * xtree local flag
 */
-#define XT_INSERT       0x00000001
+#define XT_INSERT       0x00000001
 /*
- *       xtree key/entry comparison: extent offset
+ *      xtree key/entry comparison: extent offset
 *
 * return:
- *      -1: k < start of extent
+ *      -1: k < start of extent
- *       0: start_of_extent <= k <= end_of_extent
+ *       0: start_of_extent <= k <= end_of_extent
- *       1: k > end_of_extent
+ *       1: k > end_of_extent
 */
 #define XT_CMP(CMP, K, X, OFFSET64)\
 {\
-        OFFSET64 = offsetXAD(X);\
+        OFFSET64 = offsetXAD(X);\
-        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
+        (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
-              ((K) < OFFSET64) ? -1 : 0;\
+                ((K) < OFFSET64) ? -1 : 0;\
 }
 /* write a xad entry */
 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
 {\
-        (XAD)->flag = (FLAG);\
+        (XAD)->flag = (FLAG);\
-        XADoffset((XAD), (OFF));\
+        XADoffset((XAD), (OFF));\
-        XADlength((XAD), (LEN));\
+        XADlength((XAD), (LEN));\
-        XADaddress((XAD), (ADDR));\
+        XADaddress((XAD), (ADDR));\
 }
 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
@@ -76,13 +76,13 @@
                        MP = NULL;\
                        RC = -EIO;\
                }\
-        }\
+        }\
 }
 /* for consistency */
 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
-#define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
+#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
        BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
 /* xtree entry parameter descriptor */
 struct xtsplit {
@@ -97,7 +97,7 @@ struct xtsplit {
 /*
- *      statistics
+ *      statistics
 */
 #ifdef CONFIG_JFS_STATISTICS
 static struct {
@@ -136,7 +136,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
 #endif                          /*  _STILL_TO_PORT */
 /*
- *      xtLookup()
+ *      xtLookup()
 *
 * function: map a single page into a physical extent;
 */
@@ -179,7 +179,7 @@ int xtLookup(struct inode *ip, s64 lstart,
        }
        /*
-         *      compute the physical extent covering logical extent
+         *      compute the physical extent covering logical extent
         *
         * N.B. search may have failed (e.g., hole in sparse file),
         * and returned the index of the next entry.
@@ -220,27 +220,27 @@ int xtLookup(struct inode *ip, s64 lstart,
 /*
- *      xtLookupList()
+ *      xtLookupList()
 *
 * function: map a single logical extent into a list of physical extent;
 *
 * parameter:
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      struct lxdlist  *lxdlist,       lxd list (in)
+ *      struct lxdlist  *lxdlist,       lxd list (in)
- *      struct xadlist  *xadlist,       xad list (in/out)
+ *      struct xadlist  *xadlist,       xad list (in/out)
- *      int             flag)
+ *      int             flag)
 *
 * coverage of lxd by xad under assumption of
 * . lxd's are ordered and disjoint.
 * . xad's are ordered and disjoint.
 *
 * return:
- *      0:      success
+ *      0:      success
 *
 * note: a page being written (even a single byte) is backed fully,
- *      except the last page which is only backed with blocks
+ *      except the last page which is only backed with blocks
- *      required to cover the last byte;
+ *      required to cover the last byte;
- *      the extent backing a page is fully contained within an xad;
+ *      the extent backing a page is fully contained within an xad;
 */
 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
                 struct xadlist * xadlist, int flag)
@@ -284,7 +284,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
                return rc;
        /*
-         *      compute the physical extent covering logical extent
+         *      compute the physical extent covering logical extent
         *
         * N.B. search may have failed (e.g., hole in sparse file),
         * and returned the index of the next entry.
@@ -343,7 +343,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
                if (lstart >= size)
                        goto mapend;
-                /* compare with the current xad  */
+                /* compare with the current xad */
                goto compare1;
        }
        /* lxd is covered by xad */
@@ -430,7 +430,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
        /*
         * lxd is partially covered by xad
         */
-        else {                  /* (xend < lend)  */
+        else {                  /* (xend < lend) */
                /*
                 * get next xad
@@ -477,22 +477,22 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 /*
- *      xtSearch()
+ *      xtSearch()
 *
- * function:    search for the xad entry covering specified offset.
+ * function:    search for the xad entry covering specified offset.
 *
 * parameters:
- *      ip      - file object;
+ *      ip      - file object;
- *      xoff    - extent offset;
+ *      xoff    - extent offset;
- *      nextp   - address of next extent (if any) for search miss
+ *      nextp   - address of next extent (if any) for search miss
- *      cmpp    - comparison result:
+ *      cmpp    - comparison result:
- *      btstack - traverse stack;
+ *      btstack - traverse stack;
- *      flag    - search process flag (XT_INSERT);
+ *      flag    - search process flag (XT_INSERT);
 *
 * returns:
- *      btstack contains (bn, index) of search path traversed to the entry.
+ *      btstack contains (bn, index) of search path traversed to the entry.
- *      *cmpp is set to result of comparison with the entry returned.
+ *      *cmpp is set to result of comparison with the entry returned.
- *      the page containing the entry is pinned at exit.
+ *      the page containing the entry is pinned at exit.
 */
 static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
                    int *cmpp, struct btstack * btstack, int flag)
@@ -517,7 +517,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
        btstack->nsplit = 0;
        /*
-         *      search down tree from root:
+         *      search down tree from root:
         *
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -642,7 +642,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
                        if (cmp == 0) {
                                /*
-                                 *      search hit
+                                 *      search hit
                                 */
                                /* search hit - leaf page:
                                 * return the entry found
@@ -692,7 +692,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
                }
                /*
-                 *      search miss
+                 *      search miss
                 *
                 * base is the smallest index with key (Kj) greater than
                 * search key (K) and may be zero or maxentry index.
@@ -773,22 +773,22 @@ static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 }
 /*
- *      xtInsert()
+ *      xtInsert()
 *
 * function:
 *
 * parameter:
- *      tid     - transaction id;
+ *      tid     - transaction id;
- *      ip      - file object;
+ *      ip      - file object;
- *      xflag   - extent flag (XAD_NOTRECORDED):
+ *      xflag   - extent flag (XAD_NOTRECORDED):
- *      xoff    - extent offset;
+ *      xoff    - extent offset;
- *      xlen    - extent length;
+ *      xlen    - extent length;
- *      xaddrp  - extent address pointer (in/out):
+ *      xaddrp  - extent address pointer (in/out):
- *              if (*xaddrp)
+ *              if (*xaddrp)
- *                      caller allocated data extent at *xaddrp;
+ *                      caller allocated data extent at *xaddrp;
- *              else
+ *              else
- *                      allocate data extent and return its xaddr;
+ *                      allocate data extent and return its xaddr;
- *      flag    -
+ *      flag    -
 *
 * return:
 */
@@ -813,7 +813,7 @@ int xtInsert(tid_t tid,		/* transaction id */
        jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
        /*
-         *      search for the entry location at which to insert:
+         *      search for the entry location at which to insert:
         *
         * xtFastSearch() and xtSearch() both returns (leaf page
         * pinned, index at which to insert).
@@ -853,13 +853,13 @@ int xtInsert(tid_t tid,		/* transaction id */
        }
        /*
-         *      insert entry for new extent
+         *      insert entry for new extent
         */
        xflag |= XAD_NEW;
        /*
-         *      if the leaf page is full, split the page and
+         *      if the leaf page is full, split the page and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -886,7 +886,7 @@ int xtInsert(tid_t tid,		/* transaction id */
        }
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
        /*
         * acquire a transaction lock on the leaf page;
@@ -930,16 +930,16 @@ int xtInsert(tid_t tid,		/* transaction id */
 /*
- *      xtSplitUp()
+ *      xtSplitUp()
 *
 * function:
- *      split full pages as propagating insertion up the tree
+ *      split full pages as propagating insertion up the tree
 *
 * parameter:
- *      tid     - transaction id;
+ *      tid     - transaction id;
- *      ip      - file object;
+ *      ip      - file object;
- *      split   - entry parameter descriptor;
+ *      split   - entry parameter descriptor;
- *      btstack - traverse stack from xtSearch()
+ *      btstack - traverse stack from xtSearch()
 *
 * return:
 */
@@ -1199,22 +1199,22 @@ xtSplitUp(tid_t tid,
 /*
- *      xtSplitPage()
+ *      xtSplitPage()
 *
 * function:
- *      split a full non-root page into
+ *      split a full non-root page into
- *      original/split/left page and new right page
+ *      original/split/left page and new right page
- *      i.e., the original/split page remains as left page.
+ *      i.e., the original/split page remains as left page.
 *
 * parameter:
- *      int             tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      struct xtsplit  *split,
+ *      struct xtsplit  *split,
- *      struct metapage **rmpp,
+ *      struct metapage **rmpp,
- *      u64             *rbnp,
+ *      u64             *rbnp,
 *
 * return:
- *      Pointer to page in which to insert or NULL on error.
+ *      Pointer to page in which to insert or NULL on error.
 */
 static int
 xtSplitPage(tid_t tid, struct inode *ip,
@@ -1248,9 +1248,9 @@ xtSplitPage(tid_t tid, struct inode *ip,
        rbn = addressPXD(pxd);
        /* Allocate blocks to quota. */
-       if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
+        if (DQUOT_ALLOC_BLOCK(ip, lengthPXD(pxd))) {
-               rc = -EDQUOT;
+                rc = -EDQUOT;
-               goto clean_up;
+                goto clean_up;
        }
        quota_allocation += lengthPXD(pxd);
@@ -1304,7 +1304,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
        skip = split->index;
        /*
-         *      sequential append at tail (after last entry of last page)
+         *      sequential append at tail (after last entry of last page)
         *
         * if splitting the last page on a level because of appending
         * a entry to it (skip is maxentry), it's likely that the access is
@@ -1342,7 +1342,7 @@ xtSplitPage(tid_t tid, struct inode *ip,
        }
        /*
-         *      non-sequential insert (at possibly middle page)
+         *      non-sequential insert (at possibly middle page)
         */
        /*
@@ -1465,25 +1465,24 @@ xtSplitPage(tid_t tid, struct inode *ip,
 /*
- *      xtSplitRoot()
+ *      xtSplitRoot()
 *
 * function:
- *      split the full root page into
+ *      split the full root page into original/root/split page and new
- *      original/root/split page and new right page
+ *      right page
- *      i.e., root remains fixed in tree anchor (inode) and
+ *      i.e., root remains fixed in tree anchor (inode) and the root is
- *      the root is copied to a single new right child page
+ *      copied to a single new right child page since root page <<
- *      since root page << non-root page, and
+ *      non-root page, and the split root page contains a single entry
- *      the split root page contains a single entry for the
+ *      for the new right child page.
- *      new right child page.
 *
 * parameter:
- *      int             tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      struct xtsplit  *split,
+ *      struct xtsplit  *split,
- *      struct metapage **rmpp)
+ *      struct metapage **rmpp)
 *
 * return:
- *      Pointer to page in which to insert or NULL on error.
+ *      Pointer to page in which to insert or NULL on error.
 */
 static int
 xtSplitRoot(tid_t tid,
@@ -1505,7 +1504,7 @@ xtSplitRoot(tid_t tid,
        INCREMENT(xtStat.split);
        /*
-         *      allocate a single (right) child page
+         *      allocate a single (right) child page
         */
        pxdlist = split->pxdlist;
        pxd = &pxdlist->pxd[pxdlist->npxd];
@@ -1573,7 +1572,7 @@ xtSplitRoot(tid_t tid,
        }
        /*
-         *      reset the root
+         *      reset the root
         *
         * init root with the single entry for the new right page
         * set the 1st entry offset to 0, which force the left-most key
@@ -1610,7 +1609,7 @@ xtSplitRoot(tid_t tid,
 /*
- *      xtExtend()
+ *      xtExtend()
 *
 * function: extend in-place;
 *
@@ -1677,7 +1676,7 @@ int xtExtend(tid_t tid,		/* transaction id */
                goto extendOld;
        /*
-         *      extent overflow: insert entry for new extent
+         *      extent overflow: insert entry for new extent
         */
 //insertNew:
        xoff = offsetXAD(xad) + MAXXLEN;
@@ -1685,8 +1684,8 @@ int xtExtend(tid_t tid,		/* transaction id */
        nextindex = le16_to_cpu(p->header.nextindex);
        /*
-         *      if the leaf page is full, insert the new entry and
+         *      if the leaf page is full, insert the new entry and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -1731,7 +1730,7 @@ int xtExtend(tid_t tid,		/* transaction id */
                }
        }
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
        else {
                /* insert the new entry: mark the entry NEW */
@@ -1771,11 +1770,11 @@ int xtExtend(tid_t tid,		/* transaction id */
 #ifdef _NOTYET
 /*
- *      xtTailgate()
+ *      xtTailgate()
 *
 * function: split existing 'tail' extent
- *      (split offset >= start offset of tail extent), and
+ *      (split offset >= start offset of tail extent), and
- *      relocate and extend the split tail half;
+ *      relocate and extend the split tail half;
 *
 * note: existing extent may or may not have been committed.
 * caller is responsible for pager buffer cache update, and
@@ -1804,7 +1803,7 @@ int xtTailgate(tid_t tid,		/* transaction id */
 /*
 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
-        (ulong)xoff, xlen, (ulong)xaddr);
+        (ulong)xoff, xlen, (ulong)xaddr);
 */
        /* there must exist extent to be tailgated */
@@ -1842,18 +1841,18 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
        xad = &p->xad[index];
 /*
 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
-        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
+        (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
 */
        if ((llen = xoff - offsetXAD(xad)) == 0)
                goto updateOld;
        /*
-         *      partially replace extent: insert entry for new extent
+         *      partially replace extent: insert entry for new extent
         */
 //insertNew:
        /*
-         *      if the leaf page is full, insert the new entry and
+         *      if the leaf page is full, insert the new entry and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -1898,7 +1897,7 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
                }
        }
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
        else {
                /* insert the new entry: mark the entry NEW */
@@ -1955,17 +1954,17 @@ printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 #endif /* _NOTYET */
 /*
- *      xtUpdate()
+ *      xtUpdate()
 *
 * function: update XAD;
 *
- *      update extent for allocated_but_not_recorded or
+ *      update extent for allocated_but_not_recorded or
- *      compressed extent;
+ *      compressed extent;
 *
 * parameter:
- *      nxad    - new XAD;
+ *      nxad    - new XAD;
- *                logical extent of the specified XAD must be completely
+ *              logical extent of the specified XAD must be completely
- *                contained by an existing XAD;
+ *              contained by an existing XAD;
 */
 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
 {                               /* new XAD */
@@ -2416,19 +2415,19 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
 /*
- *      xtAppend()
+ *      xtAppend()
 *
 * function: grow in append mode from contiguous region specified ;
 *
 * parameter:
- *      tid             - transaction id;
+ *      tid             - transaction id;
- *      ip              - file object;
+ *      ip              - file object;
- *      xflag           - extent flag:
+ *      xflag           - extent flag:
- *      xoff            - extent offset;
+ *      xoff            - extent offset;
- *      maxblocks       - max extent length;
+ *      maxblocks       - max extent length;
- *      xlen            - extent length (in/out);
+ *      xlen            - extent length (in/out);
- *      xaddrp          - extent address pointer (in/out):
+ *      xaddrp          - extent address pointer (in/out):
- *      flag            -
+ *      flag            -
 *
 * return:
 */
@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid,		/* transaction id */
                 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
        /*
-         *      search for the entry location at which to insert:
+         *      search for the entry location at which to insert:
         *
         * xtFastSearch() and xtSearch() both returns (leaf page
         * pinned, index at which to insert).
@@ -2482,13 +2481,13 @@ int xtAppend(tid_t tid,		/* transaction id */
                xlen = min(xlen, (int)(next - xoff));
 //insert:
        /*
-         *      insert entry for new extent
+         *      insert entry for new extent
         */
        xflag |= XAD_NEW;
        /*
-         *      if the leaf page is full, split the page and
+         *      if the leaf page is full, split the page and
-         *      propagate up the router entry for the new page from split
+         *      propagate up the router entry for the new page from split
         *
         * The xtSplitUp() will insert the entry and unpin the leaf page.
         */
@@ -2545,7 +2544,7 @@ int xtAppend(tid_t tid,		/* transaction id */
        return 0;
        /*
-         *      insert the new entry into the leaf page
+         *      insert the new entry into the leaf page
         */
      insertLeaf:
        /*
@@ -2589,17 +2588,17 @@ int xtAppend(tid_t tid,		/* transaction id */
 /* - TBD for defragmentaion/reorganization -
 *
- *      xtDelete()
+ *      xtDelete()
 *
 * function:
- *      delete the entry with the specified key.
+ *      delete the entry with the specified key.
 *
- *      N.B.: whole extent of the entry is assumed to be deleted.
+ *      N.B.: whole extent of the entry is assumed to be deleted.
 *
 * parameter:
 *
 * return:
- *       ENOENT: if the entry is not found.
+ *      ENOENT: if the entry is not found.
 *
 * exception:
 */
@@ -2665,10 +2664,10 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
 /* - TBD for defragmentaion/reorganization -
 *
- *      xtDeleteUp()
+ *      xtDeleteUp()
 *
 * function:
- *      free empty pages as propagating deletion up the tree
+ *      free empty pages as propagating deletion up the tree
 *
 * parameter:
 *
@@ -2815,15 +2814,15 @@ xtDeleteUp(tid_t tid, struct inode *ip,
 /*
- * NAME:        xtRelocate()
+ * NAME:        xtRelocate()
 *
- * FUNCTION:    relocate xtpage or data extent of regular file;
+ * FUNCTION:    relocate xtpage or data extent of regular file;
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 *
- * NOTE:        This routine does not have the logic to handle
+ * NOTE:        This routine does not have the logic to handle
- *              uncommitted allocated extent. The caller should call
+ *              uncommitted allocated extent. The caller should call
- *              txCommit() to commit all the allocation before call
+ *              txCommit() to commit all the allocation before call
- *              this routine.
+ *              this routine.
 */
 int
 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
@@ -2865,8 +2864,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                 xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
        /*
-         *      1. get and validate the parent xtpage/xad entry
+         *      1. get and validate the parent xtpage/xad entry
-         *      covering the source extent to be relocated;
+         *      covering the source extent to be relocated;
         */
        if (xtype == DATAEXT) {
                /* search in leaf entry */
@@ -2910,7 +2909,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
        jfs_info("xtRelocate: parent xad entry validated.");
        /*
-         *      2. relocate the extent
+         *      2. relocate the extent
         */
        if (xtype == DATAEXT) {
                /* if the extent is allocated-but-not-recorded
@@ -2923,7 +2922,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                        XT_PUTPAGE(pmp);
                /*
-                 *      cmRelocate()
+                 *      cmRelocate()
                 *
                 * copy target data pages to be relocated;
                 *
@@ -2945,8 +2944,8 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                pno = offset >> CM_L2BSIZE;
                npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
 /*
-                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
+                npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
-                         (offset >> CM_L2BSIZE) + 1;
+                          (offset >> CM_L2BSIZE) + 1;
 */
                sxaddr = oxaddr;
                dxaddr = nxaddr;
@@ -2981,7 +2980,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
                jfs_info("xtRelocate: target data extent relocated.");
-        } else {                /* (xtype  == XTPAGE) */
+        } else {                /* (xtype == XTPAGE) */
                /*
                 * read in the target xtpage from the source extent;
@@ -3026,16 +3025,14 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                 */
                if (lmp) {
                        BT_MARK_DIRTY(lmp, ip);
-                        tlck =
+                        tlck = txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
-                            txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
                        lp->header.next = cpu_to_le64(nxaddr);
                        XT_PUTPAGE(lmp);
                }
                if (rmp) {
                        BT_MARK_DIRTY(rmp, ip);
-                        tlck =
+                        tlck = txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
-                            txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
                        rp->header.prev = cpu_to_le64(nxaddr);
                        XT_PUTPAGE(rmp);
                }
@@ -3062,7 +3059,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
                 * scan may be skipped by commit() and logredo();
                 */
                BT_MARK_DIRTY(mp, ip);
-                /* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
+                /* tlckNEW init xtlck->lwm.offset = XTENTRYSTART; */
                tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
                xtlck = (struct xtlock *) & tlck->lock;
@@ -3084,7 +3081,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
        }
        /*
-         *      3. acquire maplock for the source extent to be freed;
+         *      3. acquire maplock for the source extent to be freed;
         *
         * acquire a maplock saving the src relocated extent address;
         * to free of the extent at commit time;
@@ -3105,7 +3102,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
         *      is no buffer associated with this lock since the buffer
         *      has been redirected to the target location.
         */
-        else                    /* (xtype  == XTPAGE) */
+        else                    /* (xtype == XTPAGE) */
                tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
        pxdlock = (struct pxd_lock *) & tlck->lock;
@@ -3115,7 +3112,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
        pxdlock->index = 1;
        /*
-         *      4. update the parent xad entry for relocation;
+         *      4. update the parent xad entry for relocation;
         *
         * acquire tlck for the parent entry with XAD_NEW as entry
         * update which will write LOG_REDOPAGE and update bmap for
@@ -3143,22 +3140,22 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 /*
- *      xtSearchNode()
+ *      xtSearchNode()
 *
- * function:    search for the internal xad entry covering specified extent.
+ * function:    search for the internal xad entry covering specified extent.
- *              This function is mainly used by defragfs utility.
+ *              This function is mainly used by defragfs utility.
 *
 * parameters:
- *      ip      - file object;
+ *      ip      - file object;
- *      xad     - extent to find;
+ *      xad     - extent to find;
- *      cmpp    - comparison result:
+ *      cmpp    - comparison result:
- *      btstack - traverse stack;
+ *      btstack - traverse stack;
- *      flag    - search process flag;
+ *      flag    - search process flag;
 *
 * returns:
- *      btstack contains (bn, index) of search path traversed to the entry.
+ *      btstack contains (bn, index) of search path traversed to the entry.
- *      *cmpp is set to result of comparison with the entry returned.
+ *      *cmpp is set to result of comparison with the entry returned.
- *      the page containing the entry is pinned at exit.
+ *      the page containing the entry is pinned at exit.
 */
 static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
                        int *cmpp, struct btstack * btstack, int flag)
@@ -3181,7 +3178,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
        xaddr = addressXAD(xad);
        /*
-         *      search down tree from root:
+         *      search down tree from root:
         *
         * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
         * internal page, child page Pi contains entry with k, Ki <= K < Kj.
@@ -3217,7 +3214,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
                        XT_CMP(cmp, xoff, &p->xad[index], t64);
                        if (cmp == 0) {
                                /*
-                                 *      search hit
+                                 *      search hit
                                 *
                                 * verify for exact match;
                                 */
@@ -3245,7 +3242,7 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
                }
                /*
-                 *      search miss - non-leaf page:
+                 *      search miss - non-leaf page:
                 *
                 * base is the smallest index with key (Kj) greater than
                 * search key (K) and may be zero or maxentry index.
@@ -3268,15 +3265,15 @@ static int xtSearchNode(struct inode *ip, xad_t * xad,	/* required XAD entry */
 /*
- *      xtRelink()
+ *      xtRelink()
 *
 * function:
- *      link around a freed page.
+ *      link around a freed page.
 *
 * Parameter:
- *      int           tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      xtpage_t        *p)
+ *      xtpage_t        *p)
 *
 * returns:
 */
@@ -3338,7 +3335,7 @@ static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
 /*
- *      xtInitRoot()
+ *      xtInitRoot()
 *
 * initialize file root (inline in inode)
 */
@@ -3385,42 +3382,42 @@ void xtInitRoot(tid_t tid, struct inode *ip)
 #define MAX_TRUNCATE_LEAVES 50
 /*
- *      xtTruncate()
+ *      xtTruncate()
 *
 * function:
- *      traverse for truncation logging backward bottom up;
+ *      traverse for truncation logging backward bottom up;
- *      terminate at the last extent entry at the current subtree
+ *      terminate at the last extent entry at the current subtree
- *      root page covering new down size.
+ *      root page covering new down size.
- *      truncation may occur within the last extent entry.
+ *      truncation may occur within the last extent entry.
 *
 * parameter:
- *      int           tid,
+ *      int             tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      s64           newsize,
+ *      s64             newsize,
- *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
+ *      int             type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
 *
 * return:
 *
 * note:
- *      PWMAP:
+ *      PWMAP:
- *       1. truncate (non-COMMIT_NOLINK file)
+ *       1. truncate (non-COMMIT_NOLINK file)
- *          by jfs_truncate() or jfs_open(O_TRUNC):
+ *          by jfs_truncate() or jfs_open(O_TRUNC):
- *          xtree is updated;
+ *          xtree is updated;
 *       2. truncate index table of directory when last entry removed
- *       map update via tlock at commit time;
+ *      map update via tlock at commit time;
- *      PMAP:
+ *      PMAP:
 *       Call xtTruncate_pmap instead
- *      WMAP:
+ *      WMAP:
- *       1. remove (free zero link count) on last reference release
+ *       1. remove (free zero link count) on last reference release
- *          (pmap has been freed at commit zero link count);
+ *          (pmap has been freed at commit zero link count);
- *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
+ *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
- *          xtree is updated;
+ *          xtree is updated;
- *       map update directly at truncation time;
+ *       map update directly at truncation time;
 *
- *      if (DELETE)
+ *      if (DELETE)
- *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
+ *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
- *      else if (TRUNCATE)
+ *      else if (TRUNCATE)
- *              must write LOG_NOREDOPAGE for deleted index page;
+ *              must write LOG_NOREDOPAGE for deleted index page;
 *
 * pages may already have been tlocked by anonymous transactions
 * during file growth (i.e., write) before truncation;
@@ -3493,7 +3490,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
         * retained in the new sized file.
         * if type is PMAP, the data and index pages are NOT
         * freed, and the data and index blocks are NOT freed
-         * from  working map.
+         * from working map.
         * (this will allow continued access of data/index of
         * temporary file (zerolink count file truncated to zero-length)).
         */
@@ -3542,7 +3539,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
                goto getChild;
        /*
-         *      leaf page
+         *      leaf page
         */
        freed = 0;
@@ -3916,7 +3913,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
        }
        /*
-         *      internal page: go down to child page of current entry
+         *      internal page: go down to child page of current entry
         */
      getChild:
        /* save current parent entry for the child page */
@@ -3965,7 +3962,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 /*
- *      xtTruncate_pmap()
+ *      xtTruncate_pmap()
 *
 * function:
 *      Perform truncate to zero lenghth for deleted file, leaving the
@@ -3974,9 +3971,9 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 *      is committed to disk.
 *
 * parameter:
- *      tid_t           tid,
+ *      tid_t           tid,
- *      struct inode    *ip,
+ *      struct inode    *ip,
- *      s64             committed_size)
+ *      s64             committed_size)
 *
 * return: new committed size
 *
@@ -4050,7 +4047,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
        }
        /*
-         *      leaf page
+         *      leaf page
         */
        if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
@@ -4062,7 +4059,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
                xoff = offsetXAD(xad);
                xlen = lengthXAD(xad);
                XT_PUTPAGE(mp);
-                return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
+                return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
        }
        tlck = txLock(tid, ip, mp, tlckXTREE);
        tlck->type = tlckXTREE | tlckFREE;
@@ -4099,8 +4096,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
                 */
                tlck = txLock(tid, ip, mp, tlckXTREE);
                xtlck = (struct xtlock *) & tlck->lock;
-                xtlck->hwm.offset =
+                xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
-                    le16_to_cpu(p->header.nextindex) - 1;
                tlck->type = tlckXTREE | tlckFREE;
                XT_PUTPAGE(mp);
@@ -4118,7 +4114,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
        else
                index--;
        /*
-         *      internal page: go down to child page of current entry
+         *      internal page: go down to child page of current entry
         */
      getChild:
        /* save current parent entry for the child page */
diff --git a/fs/jfs/jfs_xtree.h b/fs/jfs/jfs_xtree.h
index 164f6f2b1019..70815c8a3d6a 100644
--- a/fs/jfs/jfs_xtree.h
+++ b/fs/jfs/jfs_xtree.h
@@ -19,14 +19,14 @@
 #define _H_JFS_XTREE
 /*
- *      jfs_xtree.h: extent allocation descriptor B+-tree manager
+ *      jfs_xtree.h: extent allocation descriptor B+-tree manager
 */
 #include "jfs_btree.h"
 /*
- *      extent allocation descriptor (xad)
+ *      extent allocation descriptor (xad)
 */
 typedef struct xad {
        unsigned flag:8;        /* 1: flag */
@@ -38,30 +38,30 @@ typedef struct xad {
        __le32 addr2;           /* 4: address in unit of fsblksize */
 } xad_t;                        /* (16) */
-#define MAXXLEN         ((1 << 24) - 1)
+#define MAXXLEN         ((1 << 24) - 1)
-#define XTSLOTSIZE      16
+#define XTSLOTSIZE      16
-#define L2XTSLOTSIZE    4
+#define L2XTSLOTSIZE    4
 /* xad_t field construction */
 #define XADoffset(xad, offset64)\
 {\
-        (xad)->off1 = ((u64)offset64) >> 32;\
+        (xad)->off1 = ((u64)offset64) >> 32;\
-        (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
+        (xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
 }
 #define XADaddress(xad, address64)\
 {\
-        (xad)->addr1 = ((u64)address64) >> 32;\
+        (xad)->addr1 = ((u64)address64) >> 32;\
-        (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
+        (xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
 }
-#define XADlength(xad, length32)        (xad)->len = __cpu_to_le24(length32)
+#define XADlength(xad, length32)        (xad)->len = __cpu_to_le24(length32)
 /* xad_t field extraction */
 #define offsetXAD(xad)\
-        ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
+        ( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
 #define addressXAD(xad)\
-        ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
+        ( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
-#define lengthXAD(xad)  __le24_to_cpu((xad)->len)
+#define lengthXAD(xad)  __le24_to_cpu((xad)->len)
 /* xad list */
 struct xadlist {
@@ -71,22 +71,22 @@ struct xadlist {
 };
 /* xad_t flags */
-#define XAD_NEW         0x01    /* new */
+#define XAD_NEW         0x01    /* new */
-#define XAD_EXTENDED    0x02    /* extended */
+#define XAD_EXTENDED    0x02    /* extended */
-#define XAD_COMPRESSED  0x04    /* compressed with recorded length */
+#define XAD_COMPRESSED  0x04    /* compressed with recorded length */
 #define XAD_NOTRECORDED 0x08    /* allocated but not recorded */
-#define XAD_COW         0x10    /* copy-on-write */
+#define XAD_COW         0x10    /* copy-on-write */
 /* possible values for maxentry */
-#define XTROOTINITSLOT_DIR  6
+#define XTROOTINITSLOT_DIR 6
-#define XTROOTINITSLOT  10
+#define XTROOTINITSLOT  10
-#define XTROOTMAXSLOT   18
+#define XTROOTMAXSLOT   18
-#define XTPAGEMAXSLOT   256
+#define XTPAGEMAXSLOT   256
-#define XTENTRYSTART    2
+#define XTENTRYSTART    2
 /*
- *      xtree page:
+ *      xtree page:
 */
 typedef union {
        struct xtheader {
@@ -106,7 +106,7 @@ typedef union {
 } xtpage_t;
 /*
- *      external declaration
+ *      external declaration
 */
 extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
                    int *pflag, s64 * paddr, int *plen, int flag);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 41c204771262..25161c4121e4 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -328,7 +328,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
 *              dentry  - child directory dentry
 *
 * RETURN:      -EINVAL - if name is . or ..
- *              -EINVAL  - if . or .. exist but are invalid.
+ *              -EINVAL - if . or .. exist but are invalid.
 *              errors from subroutines
 *
 * note:
@@ -517,7 +517,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
        inode_dec_link_count(ip);
        /*
-         *      commit zero link count object
+         *      commit zero link count object
         */
        if (ip->i_nlink == 0) {
                assert(!test_cflag(COMMIT_Nolink, ip));
@@ -596,7 +596,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 /*
 * NAME:        commitZeroLink()
 *
- * FUNCTION:    for non-directory, called by jfs_remove(),
+ * FUNCTION:    for non-directory, called by jfs_remove(),
 *              truncate a regular file, directory or symbolic
 *              link to zero length. return 0 if type is not
 *              one of these.
@@ -676,7 +676,7 @@ static s64 commitZeroLink(tid_t tid, struct inode *ip)
 /*
 * NAME:        jfs_free_zero_link()
 *
- * FUNCTION:    for non-directory, called by iClose(),
+ * FUNCTION:    for non-directory, called by iClose(),
 *              free resources of a file from cache and WORKING map
 *              for a file previously committed with zero link count
 *              while associated with a pager object,
@@ -855,12 +855,12 @@ static int jfs_link(struct dentry *old_dentry,
 * NAME:        jfs_symlink(dip, dentry, name)
 *
 * FUNCTION:    creates a symbolic link to <symlink> by name <name>
- *                      in directory <dip>
+ *                      in directory <dip>
 *
- * PARAMETER:   dip         - parent directory vnode
+ * PARAMETER:   dip     - parent directory vnode
- *                      dentry  - dentry of symbolic link
+ *              dentry  - dentry of symbolic link
- *                      name    - the path name of the existing object
+ *              name    - the path name of the existing object
- *                                    that will be the source of the link
+ *                        that will be the source of the link
 *
 * RETURN:      errors from subroutines
 *
@@ -1052,9 +1052,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
 /*
- * NAME:        jfs_rename
+ * NAME:        jfs_rename
 *
- * FUNCTION:    rename a file or directory
+ * FUNCTION:    rename a file or directory
 */
 static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
               struct inode *new_dir, struct dentry *new_dentry)
@@ -1331,9 +1331,9 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 /*
- * NAME:        jfs_mknod
+ * NAME:        jfs_mknod
 *
- * FUNCTION:    Create a special file (device)
+ * FUNCTION:    Create a special file (device)
 */
 static int jfs_mknod(struct inode *dir, struct dentry *dentry,
                int mode, dev_t rdev)
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 79d625f3f733..71984ee95346 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -29,17 +29,17 @@
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
-#define BITSPERPAGE     (PSIZE << 3)
+#define BITSPERPAGE     (PSIZE << 3)
-#define L2MEGABYTE      20
+#define L2MEGABYTE      20
-#define MEGABYTE        (1 << L2MEGABYTE)
+#define MEGABYTE        (1 << L2MEGABYTE)
-#define MEGABYTE32     (MEGABYTE << 5)
+#define MEGABYTE32      (MEGABYTE << 5)
 /* convert block number to bmap file page number */
 #define BLKTODMAPN(b)\
-        (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
+        (((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
 /*
- *      jfs_extendfs()
+ *      jfs_extendfs()
 *
 * function: extend file system;
 *
@@ -48,9 +48,9 @@
 *                                   workspace  space
 *
 * input:
- *      new LVSize: in LV blocks (required)
+ *      new LVSize: in LV blocks (required)
- *      new LogSize: in LV blocks (optional)
+ *      new LogSize: in LV blocks (optional)
- *      new FSSize: in LV blocks (optional)
+ *      new FSSize: in LV blocks (optional)
 *
 * new configuration:
 * 1. set new LogSize as specified or default from new LVSize;
@@ -125,8 +125,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        }
        /*
-         *      reconfigure LV spaces
+         *      reconfigure LV spaces
-         *      ---------------------
+         *      ---------------------
         *
         * validate new size, or, if not specified, determine new size
         */
@@ -198,7 +198,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
                log_formatted = 1;
        }
        /*
-         *      quiesce file system
+         *      quiesce file system
         *
         * (prepare to move the inline log and to prevent map update)
         *
@@ -270,8 +270,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        }
        /*
-         *      extend block allocation map
+         *      extend block allocation map
-         *      ---------------------------
+         *      ---------------------------
         *
         * extendfs() for new extension, retry after crash recovery;
         *
@@ -283,7 +283,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
         *  s_size: aggregate size in physical blocks;
         */
        /*
-         *      compute the new block allocation map configuration
+         *      compute the new block allocation map configuration
         *
         * map dinode:
         *  di_size: map file size in byte;
@@ -301,7 +301,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        newNpages = BLKTODMAPN(t64) + 1;
        /*
-         *      extend map from current map (WITHOUT growing mapfile)
+         *      extend map from current map (WITHOUT growing mapfile)
         *
         * map new extension with unmapped part of the last partial
         * dmap page, if applicable, and extra page(s) allocated
@@ -341,8 +341,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        XSize -= nblocks;
        /*
-         *      grow map file to cover remaining extension
+         *      grow map file to cover remaining extension
-         *      and/or one extra dmap page for next extendfs();
+         *      and/or one extra dmap page for next extendfs();
         *
         * allocate new map pages and its backing blocks, and
         * update map file xtree
@@ -422,8 +422,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        dbFinalizeBmap(ipbmap);
        /*
-         *      update inode allocation map
+         *      update inode allocation map
-         *      ---------------------------
+         *      ---------------------------
         *
         * move iag lists from old to new iag;
         * agstart field is not updated for logredo() to reconstruct
@@ -442,8 +442,8 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        }
        /*
-         *      finalize
+         *      finalize
-         *      --------
+         *      --------
         *
         * extension is committed when on-disk super block is
         * updated with new descriptors: logredo will recover
@@ -480,7 +480,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
        diFreeSpecial(ipbmap2);
        /*
-         *      update superblock
+         *      update superblock
         */
        if ((rc = readSuper(sb, &bh)))
                goto error_out;
@@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
      resume:
        /*
-         *      resume file system transactions
+         *      resume file system transactions
         */
        txResume(sb);
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index b753ba216450..b2375f0774b7 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -63,9 +63,9 @@
 *
 *   On-disk:
 *
- *     FEALISTs are stored on disk using blocks allocated by dbAlloc() and
+ *      FEALISTs are stored on disk using blocks allocated by dbAlloc() and
- *     written directly. An EA list may be in-lined in the inode if there is
+ *      written directly. An EA list may be in-lined in the inode if there is
- *     sufficient room available.
+ *      sufficient room available.
 */
 struct ea_buffer {
@@ -590,7 +590,8 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
      size_check:
        if (EALIST_SIZE(ea_buf->xattr) != ea_size) {
                printk(KERN_ERR "ea_get: invalid extended attribute\n");
-                dump_mem("xattr", ea_buf->xattr, ea_size);
+                print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1,
+                                     ea_buf->xattr, ea_size, 1);
                ea_release(inode, ea_buf);
                rc = -EIO;
                goto clean_up;
diff --git a/fs/minix/file.c b/fs/minix/file.c
index f92baa1d7570..17765f697e50 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = minix_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations minix_file_inode_operations = {
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9eb8eb4e4a08..8689b736fdd9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -41,7 +41,9 @@ static int nfs_file_open(struct inode *, struct file *);
 static int nfs_file_release(struct inode *, struct file *);
 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
 static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
-static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *);
+static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
+                                        struct pipe_inode_info *pipe,
+                                        size_t count, unsigned int flags);
 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos);
 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
@@ -65,7 +67,7 @@ const struct file_operations nfs_file_operations = {
        .fsync          = nfs_fsync,
        .lock           = nfs_lock,
        .flock          = nfs_flock,
-        .sendfile       = nfs_file_sendfile,
+        .splice_read    = nfs_file_splice_read,
        .check_flags    = nfs_check_flags,
 };
@@ -224,20 +226,21 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 }
 static ssize_t
-nfs_file_sendfile(struct file *filp, loff_t *ppos, size_t count,
+nfs_file_splice_read(struct file *filp, loff_t *ppos,
-                read_actor_t actor, void *target)
+                     struct pipe_inode_info *pipe, size_t count,
+                     unsigned int flags)
 {
        struct dentry *dentry = filp->f_path.dentry;
        struct inode *inode = dentry->d_inode;
        ssize_t res;
-        dfprintk(VFS, "nfs: sendfile(%s/%s, %lu@%Lu)\n",
+        dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name,
                (unsigned long) count, (unsigned long long) *ppos);
        res = nfs_revalidate_mapping(inode, filp->f_mapping);
        if (!res)
-                res = generic_file_sendfile(filp, ppos, count, actor, target);
+                res = generic_file_splice_read(filp, ppos, pipe, count, flags);
        return res;
 }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7e6aa245b5d5..8604e35bd48e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -23,7 +23,7 @@
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/major.h>
-#include <linux/ext2_fs.h>
+#include <linux/splice.h>
 #include <linux/proc_fs.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -801,26 +801,32 @@ found:
 }
 /*
- * Grab and keep cached pages assosiated with a file in the svc_rqst
+ * Grab and keep cached pages associated with a file in the svc_rqst
- * so that they can be passed to the netowork sendmsg/sendpage routines
+ * so that they can be passed to the network sendmsg/sendpage routines
- * directrly. They will be released after the sending has completed.
+ * directly. They will be released after the sending has completed.
 */
 static int
-nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                  struct splice_desc *sd)
 {
-        unsigned long count = desc->count;
+        struct svc_rqst *rqstp = sd->u.data;
-        struct svc_rqst *rqstp = desc->arg.data;
        struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
+        struct page *page = buf->page;
+        size_t size;
+        int ret;
+        ret = buf->ops->confirm(pipe, buf);
+        if (unlikely(ret))
+                return ret;
-        if (size > count)
+        size = sd->len;
-                size = count;
        if (rqstp->rq_res.page_len == 0) {
                get_page(page);
                put_page(*pp);
                *pp = page;
                rqstp->rq_resused++;
-                rqstp->rq_res.page_base = offset;
+                rqstp->rq_res.page_base = buf->offset;
                rqstp->rq_res.page_len = size;
        } else if (page != pp[-1]) {
                get_page(page);
@@ -832,11 +838,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
        } else
                rqstp->rq_res.page_len += size;
-        desc->count = count - size;
-        desc->written += size;
        return size;
 }
+static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
+                                    struct splice_desc *sd)
+{
+        return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
+}
 static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
              loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
@@ -861,10 +871,15 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
        if (ra && ra->p_set)
                file->f_ra = ra->p_ra;
-        if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
+        if (file->f_op->splice_read && rqstp->rq_splice_ok) {
-                rqstp->rq_resused = 1;
+                struct splice_desc sd = {
-                host_err = file->f_op->sendfile(file, &offset, *count,
+                        .len            = 0,
-                                                 nfsd_read_actor, rqstp);
+                        .total_len      = *count,
+                        .pos            = offset,
+                        .u.data         = rqstp,
+                };
+                host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
        } else {
                oldfs = get_fs();
                set_fs(KERNEL_DS);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7ed56390b582..ffcc504a1667 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2276,7 +2276,7 @@ const struct file_operations ntfs_file_ops = {
                                                    mounted filesystem. */
        .mmap           = generic_file_mmap,     /* Mmap file. */
        .open           = ntfs_file_open,        /* Open file. */
-        .sendfile       = generic_file_sendfile, /* Zero-copy data send with
+        .splice_read    = generic_file_splice_read /* Zero-copy data send with
                                                    the data source being on
                                                    the ntfs partition.  We do
                                                    not need to care about the
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ac6c96431bbc..4979b6675717 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -31,7 +31,7 @@
 #include <linux/pagemap.h>
 #include <linux/uio.h>
 #include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mount.h>
 #include <linux/writeback.h>
@@ -1583,7 +1583,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
        ssize_t copied = 0;
        struct ocfs2_splice_write_priv sp;
-        ret = buf->ops->pin(pipe, buf);
+        ret = buf->ops->confirm(pipe, buf);
        if (ret)
                goto out;
@@ -1604,7 +1604,7 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
                 * might enter ocfs2_buffered_write_cluster() more
                 * than once, so keep track of our progress here.
                 */
-                copied = ocfs2_buffered_write_cluster(sd->file,
+                copied = ocfs2_buffered_write_cluster(sd->u.file,
                                                      (loff_t)sd->pos + total,
                                                      count,
                                                      ocfs2_map_and_write_splice_data,
@@ -1636,9 +1636,14 @@ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
        int ret, err;
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
+        struct splice_desc sd = {
-        ret = __splice_from_pipe(pipe, out, ppos, len, flags,
+                .total_len = len,
-                                 ocfs2_splice_write_actor);
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
+        ret = __splice_from_pipe(pipe, &sd, ocfs2_splice_write_actor);
        if (ret > 0) {
                *ppos += ret;
@@ -1817,7 +1822,6 @@ const struct inode_operations ocfs2_special_file_iops = {
 const struct file_operations ocfs2_fops = {
        .read           = do_sync_read,
        .write          = do_sync_write,
-        .sendfile       = generic_file_sendfile,
        .mmap           = ocfs2_mmap,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_file_release,
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
index 9f7ad4244f63..1e064c4a4f86 100644
--- a/fs/partitions/ibm.c
+++ b/fs/partitions/ibm.c
@@ -45,7 +45,7 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
 {
        int blocksize, offset, size,res;
        loff_t i_size;
-        dasd_information_t *info;
+        dasd_information2_t *info;
        struct hd_geometry *geo;
        char type[5] = {0,};
        char name[7] = {0,};
@@ -64,14 +64,17 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
        if (i_size == 0)
                goto out_exit;
-        if ((info = kmalloc(sizeof(dasd_information_t), GFP_KERNEL)) == NULL)
+        info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
+        if (info == NULL)
                goto out_exit;
-        if ((geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL)) == NULL)
+        geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
+        if (geo == NULL)
                goto out_nogeo;
-        if ((label = kmalloc(sizeof(union label_t), GFP_KERNEL)) == NULL)
+        label = kmalloc(sizeof(union label_t), GFP_KERNEL);
+        if (label == NULL)
                goto out_nolab;
-        if (ioctl_by_bdev(bdev, BIODASDINFO, (unsigned long)info) != 0 ||
+        if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
            ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
                goto out_freeall;
@@ -96,84 +99,108 @@ ibm_partition(struct parsed_partitions *state, struct block_device *bdev)
        res = 1;
        /*
-         * Three different types: CMS1, VOL1 and LNX1/unlabeled
+         * Three different formats: LDL, CDL and unformated disk
+         *
+         * identified by info->format
+         *
+         * unformated disks we do not have to care about
         */
-        if (strncmp(type, "CMS1", 4) == 0) {
+        if (info->format == DASD_FORMAT_LDL) {
-                /*
+                if (strncmp(type, "CMS1", 4) == 0) {
-                 * VM style CMS1 labeled disk
+                        /*
-                 */
+                         * VM style CMS1 labeled disk
-                if (label->cms.disk_offset != 0) {
+                         */
-                        printk("CMS1/%8s(MDSK):", name);
+                        if (label->cms.disk_offset != 0) {
-                        /* disk is reserved minidisk */
+                                printk("CMS1/%8s(MDSK):", name);
-                        blocksize = label->cms.block_size;
+                                /* disk is reserved minidisk */
-                        offset = label->cms.disk_offset;
+                                blocksize = label->cms.block_size;
-                        size = (label->cms.block_count - 1) * (blocksize >> 9);
+                                offset = label->cms.disk_offset;
+                                size = (label->cms.block_count - 1)
+                                        * (blocksize >> 9);
+                        } else {
+                                printk("CMS1/%8s:", name);
+                                offset = (info->label_block + 1);
+                                size = i_size >> 9;
+                        }
                } else {
-                        printk("CMS1/%8s:", name);
+                        /*
+                         * Old style LNX1 or unlabeled disk
+                         */
+                        if (strncmp(type, "LNX1", 4) == 0)
+                                printk ("LNX1/%8s:", name);
+                        else
+                                printk("(nonl)");
                        offset = (info->label_block + 1);
                        size = i_size >> 9;
                }
                put_partition(state, 1, offset*(blocksize >> 9),
-                                 size-offset*(blocksize >> 9));
+                                      size-offset*(blocksize >> 9));
-        } else if ((strncmp(type, "VOL1", 4) == 0) &&
+        } else if (info->format == DASD_FORMAT_CDL) {
-                (!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
                /*
-                 * New style VOL1 labeled disk
+                 * New style CDL formatted disk
                 */
                unsigned int blk;
                int counter;
-                printk("VOL1/%8s:", name);
-                /* get block number and read then go through format1 labels */
-                blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
-                counter = 0;
-                while ((data = read_dev_sector(bdev, blk*(blocksize/512),
-                                               &sect)) != NULL) {
-                        struct vtoc_format1_label f1;
-                        memcpy(&f1, data, sizeof(struct vtoc_format1_label));
-                        put_dev_sector(sect);
-                        /* skip FMT4 / FMT5 / FMT7 labels */
-                        if (f1.DS1FMTID == _ascebc['4']
-                            || f1.DS1FMTID == _ascebc['5']
-                            || f1.DS1FMTID == _ascebc['7']) {
-                                blk++;
-                                continue;
-                        }
-                        /* only FMT1 valid at this point */
-                        if (f1.DS1FMTID != _ascebc['1'])
-                                break;
-                        /* OK, we got valid partition data */
-                        offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
-                        size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
-                                offset + geo->sectors;
-                        if (counter >= state->limit)
-                                break;
-                        put_partition(state, counter + 1,
-                                      offset * (blocksize >> 9),
-                                      size * (blocksize >> 9));
-                        counter++;
-                        blk++;
-                }
-                if (!data)
-                /* Are we not supposed to report this ? */
-                        goto out_readerr;
-        } else {
                /*
-                 * Old style LNX1 or unlabeled disk
+                 * check if VOL1 label is available
+                 * if not, something is wrong, skipping partition detection
                 */
-                if (strncmp(type, "LNX1", 4) == 0)
+                if (strncmp(type, "VOL1",  4) == 0) {
-                        printk ("LNX1/%8s:", name);
+                        printk("VOL1/%8s:", name);
-                else
+                        /*
-                        printk("(nonl)/%8s:", name);
+                         * get block number and read then go through format1
-                offset = (info->label_block + 1);
+                         * labels
-                size = i_size >> 9;
+                         */
-                put_partition(state, 1, offset*(blocksize >> 9),
+                        blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
-                              size-offset*(blocksize >> 9));
+                        counter = 0;
+                        data = read_dev_sector(bdev, blk * (blocksize/512),
+                                               &sect);
+                        while (data != NULL) {
+                                struct vtoc_format1_label f1;
+                                memcpy(&f1, data,
+                                       sizeof(struct vtoc_format1_label));
+                                put_dev_sector(sect);
+                                /* skip FMT4 / FMT5 / FMT7 labels */
+                                if (f1.DS1FMTID == _ascebc['4']
+                                    || f1.DS1FMTID == _ascebc['5']
+                                    || f1.DS1FMTID == _ascebc['7']) {
+                                        blk++;
+                                        data = read_dev_sector(bdev, blk *
+                                                               (blocksize/512),
+                                                                &sect);
+                                        continue;
+                                }
+                                /* only FMT1 valid at this point */
+                                if (f1.DS1FMTID != _ascebc['1'])
+                                        break;
+                                /* OK, we got valid partition data */
+                                offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
+                                size  = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
+                                        offset + geo->sectors;
+                                if (counter >= state->limit)
+                                        break;
+                                put_partition(state, counter + 1,
+                                              offset * (blocksize >> 9),
+                                              size * (blocksize >> 9));
+                                counter++;
+                                blk++;
+                                data = read_dev_sector(bdev,
+                                                       blk * (blocksize/512),
+                                                       &sect);
+                        }
+                        if (!data)
+                                /* Are we not supposed to report this ? */
+                                goto out_readerr;
+                } else
+                        printk(KERN_WARNING "Warning, expected Label VOL1 not "
+                               "found, treating as CDL formated Disk");
        }
        printk("\n");
diff --git a/fs/pipe.c b/fs/pipe.c
index 3a89592bdf57..d007830d9c87 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -164,6 +164,20 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
                page_cache_release(page);
 }
+/**
+ * generic_pipe_buf_map - virtually map a pipe buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer that should be mapped
+ * @atomic:     whether to use an atomic map
+ *
+ * Description:
+ *      This function returns a kernel virtual address mapping for the
+ *      passed in @pipe_buffer. If @atomic is set, an atomic map is provided
+ *      and the caller has to be careful not to fault before calling
+ *      the unmap function.
+ *
+ *      Note that this function occupies KM_USER0 if @atomic != 0.
+ */
 void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
                           struct pipe_buffer *buf, int atomic)
 {
@@ -175,6 +189,15 @@ void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
        return kmap(buf->page);
 }
+/**
+ * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer that should be unmapped
+ * @map_data:   the data that the mapping function returned
+ *
+ * Description:
+ *      This function undoes the mapping that ->map() provided.
+ */
 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
                            struct pipe_buffer *buf, void *map_data)
 {
@@ -185,11 +208,28 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
                kunmap(buf->page);
 }
+/**
+ * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer to attempt to steal
+ *
+ * Description:
+ *      This function attempts to steal the @struct page attached to
+ *      @buf. If successful, this function returns 0 and returns with
+ *      the page locked. The caller may then reuse the page for whatever
+ *      he wishes, the typical use is insertion into a different file
+ *      page cache.
+ */
 int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
                           struct pipe_buffer *buf)
 {
        struct page *page = buf->page;
+        /*
+         * A reference of one is golden, that means that the owner of this
+         * page is the only one holding a reference to it. lock the page
+         * and return OK.
+         */
        if (page_count(page) == 1) {
                lock_page(page);
                return 0;
@@ -198,12 +238,32 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
        return 1;
 }
-void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_get - get a reference to a @struct pipe_buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer to get a reference to
+ *
+ * Description:
+ *      This function grabs an extra reference to @buf. It's used in
+ *      in the tee() system call, when we duplicate the buffers in one
+ *      pipe into another.
+ */
+void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
        page_cache_get(buf->page);
 }
-int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
+/**
+ * generic_pipe_buf_confirm - verify contents of the pipe buffer
+ * @pipe:       the pipe that the buffer belongs to
+ * @buf:        the buffer to confirm
+ *
+ * Description:
+ *      This function does nothing, because the generic pipe code uses
+ *      pages that are always good when inserted into the pipe.
+ */
+int generic_pipe_buf_confirm(struct pipe_inode_info *info,
+                             struct pipe_buffer *buf)
 {
        return 0;
 }
@@ -212,7 +272,7 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
        .can_merge = 1,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-        .pin = generic_pipe_buf_pin,
+        .confirm = generic_pipe_buf_confirm,
        .release = anon_pipe_buf_release,
        .steal = generic_pipe_buf_steal,
        .get = generic_pipe_buf_get,
@@ -252,7 +312,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
                        if (chars > total_len)
                                chars = total_len;
-                        error = ops->pin(pipe, buf);
+                        error = ops->confirm(pipe, buf);
                        if (error) {
                                if (!ret)
                                        error = ret;
@@ -373,7 +433,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
                        int error, atomic = 1;
                        void *addr;
-                        error = ops->pin(pipe, buf);
+                        error = ops->confirm(pipe, buf);
                        if (error)
                                goto out;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 74f30e0c0381..98e78e2f18d6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -165,7 +165,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
        rcu_read_lock();
        buffer += sprintf(buffer,
                "State:\t%s\n"
-                "SleepAVG:\t%lu%%\n"
                "Tgid:\t%d\n"
                "Pid:\t%d\n"
                "PPid:\t%d\n"
@@ -173,7 +172,6 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Uid:\t%d\t%d\t%d\t%d\n"
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
-                (p->sleep_avg/1024)*100/(1020000000/1024),
                p->tgid, p->pid,
                pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
                pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
@@ -312,6 +310,41 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        return buffer - orig;
 }
+static clock_t task_utime(struct task_struct *p)
+{
+        clock_t utime = cputime_to_clock_t(p->utime),
+                total = utime + cputime_to_clock_t(p->stime);
+        u64 temp;
+        /*
+         * Use CFS's precise accounting:
+         */
+        temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
+        if (total) {
+                temp *= utime;
+                do_div(temp, total);
+        }
+        utime = (clock_t)temp;
+        return utime;
+}
+static clock_t task_stime(struct task_struct *p)
+{
+        clock_t stime = cputime_to_clock_t(p->stime);
+        /*
+         * Use CFS's precise accounting. (we subtract utime from
+         * the total, to make sure the total observed by userspace
+         * grows monotonically - apps rely on that):
+         */
+        stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p);
+        return stime;
+}
 static int do_task_stat(struct task_struct *task, char * buffer, int whole)
 {
        unsigned long vsize, eip, esp, wchan = ~0UL;
@@ -326,7 +359,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        unsigned long long start_time;
        unsigned long cmin_flt = 0, cmaj_flt = 0;
        unsigned long  min_flt = 0,  maj_flt = 0;
-        cputime_t cutime, cstime, utime, stime;
+        cputime_t cutime, cstime;
+        clock_t utime, stime;
        unsigned long rsslim = 0;
        char tcomm[sizeof(task->comm)];
        unsigned long flags;
@@ -344,7 +378,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        sigemptyset(&sigign);
        sigemptyset(&sigcatch);
-        cutime = cstime = utime = stime = cputime_zero;
+        cutime = cstime = cputime_zero;
+        utime = stime = 0;
        rcu_read_lock();
        if (lock_task_sighand(task, &flags)) {
@@ -370,15 +405,15 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
-                                utime = cputime_add(utime, t->utime);
+                                utime += task_utime(t);
-                                stime = cputime_add(stime, t->stime);
+                                stime += task_stime(t);
                                t = next_thread(t);
                        } while (t != task);
                        min_flt += sig->min_flt;
                        maj_flt += sig->maj_flt;
-                        utime = cputime_add(utime, sig->utime);
+                        utime += cputime_to_clock_t(sig->utime);
-                        stime = cputime_add(stime, sig->stime);
+                        stime += cputime_to_clock_t(sig->stime);
                }
                sid = signal_session(sig);
@@ -394,8 +429,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        if (!whole) {
                min_flt = task->min_flt;
                maj_flt = task->maj_flt;
-                utime = task->utime;
+                utime = task_utime(task);
-                stime = task->stime;
+                stime = task_stime(task);
        }
        /* scale priority and nice values from timeslices to -20..20 */
@@ -426,8 +461,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                cmin_flt,
                maj_flt,
                cmaj_flt,
-                cputime_to_clock_t(utime),
+                utime,
-                cputime_to_clock_t(stime),
+                stime,
                cputime_to_clock_t(cutime),
                cputime_to_clock_t(cstime),
                priority,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index a5fa1fdafc4e..46ea5d56e1bb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -296,7 +296,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
 */
 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
 {
-        return sprintf(buffer, "%lu %lu %lu\n",
+        return sprintf(buffer, "%llu %llu %lu\n",
                        task->sched_info.cpu_time,
                        task->sched_info.run_delay,
                        task->sched_info.pcnt);
@@ -929,6 +929,69 @@ static const struct file_operations proc_fault_inject_operations = {
 };
 #endif
+#ifdef CONFIG_SCHED_DEBUG
+/*
+ * Print out various scheduling related per-task fields:
+ */
+static int sched_show(struct seq_file *m, void *v)
+{
+        struct inode *inode = m->private;
+        struct task_struct *p;
+        WARN_ON(!inode);
+        p = get_proc_task(inode);
+        if (!p)
+                return -ESRCH;
+        proc_sched_show_task(p, m);
+        put_task_struct(p);
+        return 0;
+}
+static ssize_t
+sched_write(struct file *file, const char __user *buf,
+            size_t count, loff_t *offset)
+{
+        struct inode *inode = file->f_path.dentry->d_inode;
+        struct task_struct *p;
+        WARN_ON(!inode);
+        p = get_proc_task(inode);
+        if (!p)
+                return -ESRCH;
+        proc_sched_set_task(p);
+        put_task_struct(p);
+        return count;
+}
+static int sched_open(struct inode *inode, struct file *filp)
+{
+        int ret;
+        ret = single_open(filp, sched_show, NULL);
+        if (!ret) {
+                struct seq_file *m = filp->private_data;
+                m->private = inode;
+        }
+        return ret;
+}
+static const struct file_operations proc_pid_sched_operations = {
+        .open           = sched_open,
+        .read           = seq_read,
+        .write          = sched_write,
+        .llseek         = seq_lseek,
+        .release        = seq_release,
+};
+#endif
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct inode *inode = dentry->d_inode;
@@ -1963,6 +2026,9 @@ static const struct pid_entry tgid_base_stuff[] = {
        INF("environ",    S_IRUSR, pid_environ),
        INF("auxv",       S_IRUSR, pid_auxv),
        INF("status",     S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+        REG("sched",      S_IRUGO|S_IWUSR, pid_sched),
+#endif
        INF("cmdline",    S_IRUGO, pid_cmdline),
        INF("stat",       S_IRUGO, tgid_stat),
        INF("statm",      S_IRUGO, pid_statm),
@@ -2247,6 +2313,9 @@ static const struct pid_entry tid_base_stuff[] = {
        INF("environ",   S_IRUSR, pid_environ),
        INF("auxv",      S_IRUSR, pid_auxv),
        INF("status",    S_IRUGO, pid_status),
+#ifdef CONFIG_SCHED_DEBUG
+        REG("sched",     S_IRUGO|S_IWUSR, pid_sched),
+#endif
        INF("cmdline",   S_IRUGO, pid_cmdline),
        INF("stat",      S_IRUGO, tid_stat),
        INF("statm",     S_IRUGO, pid_statm),
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 44649981bbc8..867f42b02035 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -25,7 +25,7 @@ const struct file_operations qnx4_file_operations =
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .mmap           = generic_file_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 #ifdef CONFIG_QNX4FS_RW
        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 2f14774a124f..97bdc0b2f9d2 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -41,7 +41,7 @@ const struct file_operations ramfs_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = simple_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
        .llseek         = generic_file_llseek,
 };
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 5d258c40a2fd..cad2b7ace630 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -42,7 +42,7 @@ const struct file_operations ramfs_file_operations = {
        .write                  = do_sync_write,
        .aio_write              = generic_file_aio_write,
        .fsync                  = simple_sync_file,
-        .sendfile               = generic_file_sendfile,
+        .splice_read            = generic_file_splice_read,
        .llseek                 = generic_file_llseek,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 4d03008f015b..507ddff48a9a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/syscalls.h>
 #include <linux/pagemap.h>
+#include <linux/splice.h>
 #include "read_write.h"
 #include <asm/uaccess.h>
@@ -25,7 +26,7 @@ const struct file_operations generic_ro_fops = {
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
        .mmap           = generic_file_readonly_mmap,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 EXPORT_SYMBOL(generic_ro_fops);
@@ -708,7 +709,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
        struct inode * in_inode, * out_inode;
        loff_t pos;
        ssize_t retval;
-        int fput_needed_in, fput_needed_out;
+        int fput_needed_in, fput_needed_out, fl;
        /*
         * Get input file, and verify that it is ok..
@@ -723,7 +724,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
        in_inode = in_file->f_path.dentry->d_inode;
        if (!in_inode)
                goto fput_in;
-        if (!in_file->f_op || !in_file->f_op->sendfile)
+        if (!in_file->f_op || !in_file->f_op->splice_read)
                goto fput_in;
        retval = -ESPIPE;
        if (!ppos)
@@ -776,7 +777,18 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                count = max - pos;
        }
-        retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
+        fl = 0;
+#if 0
+        /*
+         * We need to debate whether we can enable this or not. The
+         * man page documents EAGAIN return for the output at least,
+         * and the application is arguably buggy if it doesn't expect
+         * EAGAIN on a non-blocking file descriptor.
+         */
+        if (in_file->f_flags & O_NONBLOCK)
+                fl = SPLICE_F_NONBLOCK;
+#endif
+        retval = do_splice_direct(in_file, ppos, out_file, count, fl);
        if (retval > 0) {
                add_rchar(current, retval);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9e451a68580f..30eebfb1b2d8 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1531,7 +1531,6 @@ const struct file_operations reiserfs_file_operations = {
        .open = generic_file_open,
        .release = reiserfs_file_release,
        .fsync = reiserfs_sync_file,
-        .sendfile = generic_file_sendfile,
        .aio_read = generic_file_aio_read,
        .aio_write = generic_file_aio_write,
        .splice_read = generic_file_splice_read,
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0ac22af7afe5..49194a4e6b91 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -447,3 +447,37 @@ int seq_puts(struct seq_file *m, const char *s)
        return -1;
 }
 EXPORT_SYMBOL(seq_puts);
+struct list_head *seq_list_start(struct list_head *head, loff_t pos)
+{
+        struct list_head *lh;
+        list_for_each(lh, head)
+                if (pos-- == 0)
+                        return lh;
+        return NULL;
+}
+EXPORT_SYMBOL(seq_list_start);
+struct list_head *seq_list_start_head(struct list_head *head, loff_t pos)
+{
+        if (!pos)
+                return head;
+        return seq_list_start(head, pos - 1);
+}
+EXPORT_SYMBOL(seq_list_start_head);
+struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos)
+{
+        struct list_head *lh;
+        lh = ((struct list_head *)v)->next;
+        ++*ppos;
+        return lh == head ? NULL : lh;
+}
+EXPORT_SYMBOL(seq_list_next);
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index aea3f8aa54c0..c5d78a7e492b 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -262,8 +262,9 @@ out:
 }
 static ssize_t
-smb_file_sendfile(struct file *file, loff_t *ppos,
+smb_file_splice_read(struct file *file, loff_t *ppos,
-                  size_t count, read_actor_t actor, void *target)
+                     struct pipe_inode_info *pipe, size_t count,
+                     unsigned int flags)
 {
        struct dentry *dentry = file->f_path.dentry;
        ssize_t status;
@@ -277,7 +278,7 @@ smb_file_sendfile(struct file *file, loff_t *ppos,
                         DENTRY_PATH(dentry), status);
                goto out;
        }
-        status = generic_file_sendfile(file, ppos, count, actor, target);
+        status = generic_file_splice_read(file, ppos, pipe, count, flags);
 out:
        return status;
 }
@@ -416,7 +417,7 @@ const struct file_operations smb_file_operations =
        .open           = smb_file_open,
        .release        = smb_file_release,
        .fsync          = smb_fsync,
-        .sendfile       = smb_file_sendfile,
+        .splice_read    = smb_file_splice_read,
 };
 const struct inode_operations smb_file_inode_operations =
diff --git a/fs/splice.c b/fs/splice.c
index e7d7080de2f9..ed2ce995475c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -20,7 +20,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
-#include <linux/pipe_fs_i.h>
+#include <linux/splice.h>
 #include <linux/mm_inline.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
@@ -29,22 +29,6 @@
 #include <linux/syscalls.h>
 #include <linux/uio.h>
-struct partial_page {
-        unsigned int offset;
-        unsigned int len;
-};
-/*
- * Passed to splice_to_pipe
- */
-struct splice_pipe_desc {
-        struct page **pages;            /* page map */
-        struct partial_page *partial;   /* pages[] may not be contig */
-        int nr_pages;                   /* number of pages in map */
-        unsigned int flags;             /* splice flags */
-        const struct pipe_buf_operations *ops;/* ops associated with output pipe */
-};
 /*
 * Attempt to steal a page from a pipe buffer. This should perhaps go into
 * a vm helper function, it's already simplified quite a bit by the
@@ -101,8 +85,12 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
        buf->flags &= ~PIPE_BUF_FLAG_LRU;
 }
-static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe,
+/*
-                                   struct pipe_buffer *buf)
+ * Check whether the contents of buf is OK to access. Since the content
+ * is a page cache page, IO may be in flight.
+ */
+static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
+                                       struct pipe_buffer *buf)
 {
        struct page *page = buf->page;
        int err;
@@ -143,7 +131,7 @@ static const struct pipe_buf_operations page_cache_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-        .pin = page_cache_pipe_buf_pin,
+        .confirm = page_cache_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = page_cache_pipe_buf_steal,
        .get = generic_pipe_buf_get,
@@ -163,18 +151,25 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = {
        .can_merge = 0,
        .map = generic_pipe_buf_map,
        .unmap = generic_pipe_buf_unmap,
-        .pin = generic_pipe_buf_pin,
+        .confirm = generic_pipe_buf_confirm,
        .release = page_cache_pipe_buf_release,
        .steal = user_page_pipe_buf_steal,
        .get = generic_pipe_buf_get,
 };
-/*
+/**
- * Pipe output worker. This sets up our pipe format with the page cache
+ * splice_to_pipe - fill passed data into a pipe
- * pipe buffer operations. Otherwise very similar to the regular pipe_writev().
+ * @pipe:       pipe to fill
+ * @spd:        data to fill
+ *
+ * Description:
+ *    @spd contains a map of pages and len/offset tupples, a long with
+ *    the struct pipe_buf_operations associated with these pages. This
+ *    function will link that data to the pipe.
+ *
 */
-static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
+ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
-                              struct splice_pipe_desc *spd)
+                       struct splice_pipe_desc *spd)
 {
        unsigned int spd_pages = spd->nr_pages;
        int ret, do_wakeup, page_nr;
@@ -201,6 +196,7 @@ static ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                        buf->page = spd->pages[page_nr];
                        buf->offset = spd->partial[page_nr].offset;
                        buf->len = spd->partial[page_nr].len;
+                        buf->private = spd->partial[page_nr].private;
                        buf->ops = spd->ops;
                        if (spd->flags & SPLICE_F_GIFT)
                                buf->flags |= PIPE_BUF_FLAG_GIFT;
@@ -296,19 +292,15 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
        page_cache_readahead(mapping, &in->f_ra, in, index, nr_pages);
        /*
-         * Now fill in the holes:
-         */
-        error = 0;
-        /*
         * Lookup the (hopefully) full range of pages we need.
         */
        spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
        /*
         * If find_get_pages_contig() returned fewer pages than we needed,
-         * allocate the rest.
+         * allocate the rest and fill in the holes.
         */
+        error = 0;
        index += spd.nr_pages;
        while (spd.nr_pages < nr_pages) {
                /*
@@ -470,11 +462,16 @@ fill_it:
 /**
 * generic_file_splice_read - splice data from file to a pipe
 * @in:         file to splice from
+ * @ppos:       position in @in
 * @pipe:       pipe to splice to
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will read pages from given file and fill them into a pipe.
+ * Description:
+ *    Will read pages from given file and fill them into a pipe. Can be
+ *    used as long as the address_space operations for the source implements
+ *    a readpage() hook.
+ *
 */
 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
                                 struct pipe_inode_info *pipe, size_t len,
@@ -528,11 +525,11 @@ EXPORT_SYMBOL(generic_file_splice_read);
 static int pipe_to_sendpage(struct pipe_inode_info *pipe,
                            struct pipe_buffer *buf, struct splice_desc *sd)
 {
-        struct file *file = sd->file;
+        struct file *file = sd->u.file;
        loff_t pos = sd->pos;
        int ret, more;
-        ret = buf->ops->pin(pipe, buf);
+        ret = buf->ops->confirm(pipe, buf);
        if (!ret) {
                more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
@@ -566,7 +563,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
                        struct splice_desc *sd)
 {
-        struct file *file = sd->file;
+        struct file *file = sd->u.file;
        struct address_space *mapping = file->f_mapping;
        unsigned int offset, this_len;
        struct page *page;
@@ -576,7 +573,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        /*
         * make sure the data in this buffer is uptodate
         */
-        ret = buf->ops->pin(pipe, buf);
+        ret = buf->ops->confirm(pipe, buf);
        if (unlikely(ret))
                return ret;
@@ -663,36 +660,37 @@ out_ret:
        return ret;
 }
-/*
+/**
- * Pipe input worker. Most of this logic works like a regular pipe, the
+ * __splice_from_pipe - splice data from a pipe to given actor
- * key here is the 'actor' worker passed in that actually moves the data
+ * @pipe:       pipe to splice from
- * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
+ * @sd:         information to @actor
+ * @actor:      handler that splices the data
+ *
+ * Description:
+ *    This function does little more than loop over the pipe and call
+ *    @actor to do the actual moving of a single struct pipe_buffer to
+ *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ *    pipe_to_user.
+ *
 */
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
-                           struct file *out, loff_t *ppos, size_t len,
+                           splice_actor *actor)
-                           unsigned int flags, splice_actor *actor)
 {
        int ret, do_wakeup, err;
-        struct splice_desc sd;
        ret = 0;
        do_wakeup = 0;
-        sd.total_len = len;
-        sd.flags = flags;
-        sd.file = out;
-        sd.pos = *ppos;
        for (;;) {
                if (pipe->nrbufs) {
                        struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
                        const struct pipe_buf_operations *ops = buf->ops;
-                        sd.len = buf->len;
+                        sd->len = buf->len;
-                        if (sd.len > sd.total_len)
+                        if (sd->len > sd->total_len)
-                                sd.len = sd.total_len;
+                                sd->len = sd->total_len;
-                        err = actor(pipe, buf, &sd);
+                        err = actor(pipe, buf, sd);
                        if (err <= 0) {
                                if (!ret && err != -ENODATA)
                                        ret = err;
@@ -704,10 +702,10 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                        buf->offset += err;
                        buf->len -= err;
-                        sd.len -= err;
+                        sd->len -= err;
-                        sd.pos += err;
+                        sd->pos += err;
-                        sd.total_len -= err;
+                        sd->total_len -= err;
-                        if (sd.len)
+                        if (sd->len)
                                continue;
                        if (!buf->len) {
@@ -719,7 +717,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                                        do_wakeup = 1;
                        }
-                        if (!sd.total_len)
+                        if (!sd->total_len)
                                break;
                }
@@ -732,7 +730,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
                                break;
                }
-                if (flags & SPLICE_F_NONBLOCK) {
+                if (sd->flags & SPLICE_F_NONBLOCK) {
                        if (!ret)
                                ret = -EAGAIN;
                        break;
@@ -766,12 +764,32 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL(__splice_from_pipe);
+/**
+ * splice_from_pipe - splice data from a pipe to a file
+ * @pipe:       pipe to splice from
+ * @out:        file to splice to
+ * @ppos:       position in @out
+ * @len:        how many bytes to splice
+ * @flags:      splice modifier flags
+ * @actor:      handler that splices the data
+ *
+ * Description:
+ *    See __splice_from_pipe. This function locks the input and output inodes,
+ *    otherwise it's identical to __splice_from_pipe().
+ *
+ */
 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
                         loff_t *ppos, size_t len, unsigned int flags,
                         splice_actor *actor)
 {
        ssize_t ret;
        struct inode *inode = out->f_mapping->host;
+        struct splice_desc sd = {
+                .total_len = len,
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
        /*
         * The actor worker might be calling ->prepare_write and
@@ -780,7 +798,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
         * pipe->inode, we have to order lock acquiry here.
         */
        inode_double_lock(inode, pipe->inode);
-        ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+        ret = __splice_from_pipe(pipe, &sd, actor);
        inode_double_unlock(inode, pipe->inode);
        return ret;
@@ -790,12 +808,14 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
 * @pipe:       pipe info
 * @out:        file to write to
+ * @ppos:       position in @out
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will either move or copy pages (determined by @flags options) from
+ * Description:
- * the given pipe inode to the given file. The caller is responsible
+ *    Will either move or copy pages (determined by @flags options) from
- * for acquiring i_mutex on both inodes.
+ *    the given pipe inode to the given file. The caller is responsible
+ *    for acquiring i_mutex on both inodes.
 *
 */
 ssize_t
@@ -804,6 +824,12 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 {
        struct address_space *mapping = out->f_mapping;
        struct inode *inode = mapping->host;
+        struct splice_desc sd = {
+                .total_len = len,
+                .flags = flags,
+                .pos = *ppos,
+                .u.file = out,
+        };
        ssize_t ret;
        int err;
@@ -811,7 +837,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
        if (unlikely(err))
                return err;
-        ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+        ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
        if (ret > 0) {
                unsigned long nr_pages;
@@ -841,11 +867,13 @@ EXPORT_SYMBOL(generic_file_splice_write_nolock);
 * generic_file_splice_write - splice data from a pipe to a file
 * @pipe:       pipe info
 * @out:        file to write to
+ * @ppos:       position in @out
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will either move or copy pages (determined by @flags options) from
+ * Description:
- * the given pipe inode to the given file.
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
 *
 */
 ssize_t
@@ -896,13 +924,15 @@ EXPORT_SYMBOL(generic_file_splice_write);
 /**
 * generic_splice_sendpage - splice data from a pipe to a socket
- * @inode:      pipe inode
+ * @pipe:       pipe to splice from
 * @out:        socket to write to
+ * @ppos:       position in @out
 * @len:        number of bytes to splice
 * @flags:      splice modifier flags
 *
- * Will send @len bytes from the pipe to a network socket. No data copying
+ * Description:
- * is involved.
+ *    Will send @len bytes from the pipe to a network socket. No data copying
+ *    is involved.
 *
 */
 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
@@ -956,14 +986,27 @@ static long do_splice_to(struct file *in, loff_t *ppos,
        return in->f_op->splice_read(in, ppos, pipe, len, flags);
 }
-long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+/**
-                      size_t len, unsigned int flags)
+ * splice_direct_to_actor - splices data directly between two non-pipes
+ * @in:         file to splice from
+ * @sd:         actor information on where to splice to
+ * @actor:      handles the data splicing
+ *
+ * Description:
+ *    This is a special case helper to splice directly between two
+ *    points, without requiring an explicit pipe. Internally an allocated
+ *    pipe is cached in the process, and reused during the life time of
+ *    that process.
+ *
+ */
+ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
+                               splice_direct_actor *actor)
 {
        struct pipe_inode_info *pipe;
        long ret, bytes;
-        loff_t out_off;
        umode_t i_mode;
-        int i;
+        size_t len;
+        int i, flags;
        /*
         * We require the input being a regular file, as we don't want to
@@ -999,7 +1042,13 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
         */
        ret = 0;
        bytes = 0;
-        out_off = 0;
+        len = sd->total_len;
+        flags = sd->flags;
+        /*
+         * Don't block on output, we have to drain the direct pipe.
+         */
+        sd->flags &= ~SPLICE_F_NONBLOCK;
        while (len) {
                size_t read_len, max_read_len;
@@ -1009,19 +1058,19 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
                 */
                max_read_len = min(len, (size_t)(PIPE_BUFFERS*PAGE_SIZE));
-                ret = do_splice_to(in, ppos, pipe, max_read_len, flags);
+                ret = do_splice_to(in, &sd->pos, pipe, max_read_len, flags);
                if (unlikely(ret < 0))
                        goto out_release;
                read_len = ret;
+                sd->total_len = read_len;
                /*
                 * NOTE: nonblocking mode only applies to the input. We
                 * must not do the output in nonblocking mode as then we
                 * could get stuck data in the internal pipe:
                 */
-                ret = do_splice_from(pipe, out, &out_off, read_len,
+                ret = actor(pipe, sd);
-                                     flags & ~SPLICE_F_NONBLOCK);
                if (unlikely(ret < 0))
                        goto out_release;
@@ -1066,6 +1115,48 @@ out_release:
                return bytes;
        return ret;
+}
+EXPORT_SYMBOL(splice_direct_to_actor);
+static int direct_splice_actor(struct pipe_inode_info *pipe,
+                               struct splice_desc *sd)
+{
+        struct file *file = sd->u.file;
+        return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags);
+}
+/**
+ * do_splice_direct - splices data directly between two files
+ * @in:         file to splice from
+ * @ppos:       input file offset
+ * @out:        file to splice to
+ * @len:        number of bytes to splice
+ * @flags:      splice modifier flags
+ *
+ * Description:
+ *    For use by do_sendfile(). splice can easily emulate sendfile, but
+ *    doing it in the application would incur an extra system call
+ *    (splice in + splice out, as compared to just sendfile()). So this helper
+ *    can splice directly through a process-private pipe.
+ *
+ */
+long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+                      size_t len, unsigned int flags)
+{
+        struct splice_desc sd = {
+                .len            = len,
+                .total_len      = len,
+                .flags          = flags,
+                .pos            = *ppos,
+                .u.file         = out,
+        };
+        size_t ret;
+        ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
+        *ppos = sd.pos;
+        return ret;
 }
 /*
@@ -1248,28 +1339,131 @@ static int get_iovec_page_array(const struct iovec __user *iov,
        return error;
 }
+static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+                        struct splice_desc *sd)
+{
+        char *src;
+        int ret;
+        ret = buf->ops->confirm(pipe, buf);
+        if (unlikely(ret))
+                return ret;
+        /*
+         * See if we can use the atomic maps, by prefaulting in the
+         * pages and doing an atomic copy
+         */
+        if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
+                src = buf->ops->map(pipe, buf, 1);
+                ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
+                                                        sd->len);
+                buf->ops->unmap(pipe, buf, src);
+                if (!ret) {
+                        ret = sd->len;
+                        goto out;
+                }
+        }
+        /*
+         * No dice, use slow non-atomic map and copy
+         */
+        src = buf->ops->map(pipe, buf, 0);
+        ret = sd->len;
+        if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
+                ret = -EFAULT;
+out:
+        if (ret > 0)
+                sd->u.userptr += ret;
+        buf->ops->unmap(pipe, buf, src);
+        return ret;
+}
+/*
+ * For lack of a better implementation, implement vmsplice() to userspace
+ * as a simple copy of the pipes pages to the user iov.
+ */
+static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
+                             unsigned long nr_segs, unsigned int flags)
+{
+        struct pipe_inode_info *pipe;
+        struct splice_desc sd;
+        ssize_t size;
+        int error;
+        long ret;
+        pipe = pipe_info(file->f_path.dentry->d_inode);
+        if (!pipe)
+                return -EBADF;
+        if (pipe->inode)
+                mutex_lock(&pipe->inode->i_mutex);
+        error = ret = 0;
+        while (nr_segs) {
+                void __user *base;
+                size_t len;
+                /*
+                 * Get user address base and length for this iovec.
+                 */
+                error = get_user(base, &iov->iov_base);
+                if (unlikely(error))
+                        break;
+                error = get_user(len, &iov->iov_len);
+                if (unlikely(error))
+                        break;
+                /*
+                 * Sanity check this iovec. 0 read succeeds.
+                 */
+                if (unlikely(!len))
+                        break;
+                if (unlikely(!base)) {
+                        error = -EFAULT;
+                        break;
+                }
+                sd.len = 0;
+                sd.total_len = len;
+                sd.flags = flags;
+                sd.u.userptr = base;
+                sd.pos = 0;
+                size = __splice_from_pipe(pipe, &sd, pipe_to_user);
+                if (size < 0) {
+                        if (!ret)
+                                ret = size;
+                        break;
+                }
+                ret += size;
+                if (size < len)
+                        break;
+                nr_segs--;
+                iov++;
+        }
+        if (pipe->inode)
+                mutex_unlock(&pipe->inode->i_mutex);
+        if (!ret)
+                ret = error;
+        return ret;
+}
 /*
 * vmsplice splices a user address range into a pipe. It can be thought of
 * as splice-from-memory, where the regular splice is splice-from-file (or
 * to file). In both cases the output is a pipe, naturally.
- *
- * Note that vmsplice only supports splicing _from_ user memory to a pipe,
- * not the other way around. Splicing from user memory is a simple operation
- * that can be supported without any funky alignment restrictions or nasty
- * vm tricks. We simply map in the user memory and fill them into a pipe.
- * The reverse isn't quite as easy, though. There are two possible solutions
- * for that:
- *
- *      - memcpy() the data internally, at which point we might as well just
- *        do a regular read() on the buffer anyway.
- *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
- *        has restriction limitations on both ends of the pipe).
- *
- * Alas, it isn't here.
- *
 */
-static long do_vmsplice(struct file *file, const struct iovec __user *iov,
+static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
-                        unsigned long nr_segs, unsigned int flags)
+                             unsigned long nr_segs, unsigned int flags)
 {
        struct pipe_inode_info *pipe;
        struct page *pages[PIPE_BUFFERS];
@@ -1284,10 +1478,6 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
        pipe = pipe_info(file->f_path.dentry->d_inode);
        if (!pipe)
                return -EBADF;
-        if (unlikely(nr_segs > UIO_MAXIOV))
-                return -EINVAL;
-        else if (unlikely(!nr_segs))
-                return 0;
        spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
                                            flags & SPLICE_F_GIFT);
@@ -1297,6 +1487,22 @@ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
        return splice_to_pipe(pipe, &spd);
 }
+/*
+ * Note that vmsplice only really supports true splicing _from_ user memory
+ * to a pipe, not the other way around. Splicing from user memory is a simple
+ * operation that can be supported without any funky alignment restrictions
+ * or nasty vm tricks. We simply map in the user memory and fill them into
+ * a pipe. The reverse isn't quite as easy, though. There are two possible
+ * solutions for that:
+ *
+ *      - memcpy() the data internally, at which point we might as well just
+ *        do a regular read() on the buffer anyway.
+ *      - Lots of nasty vm tricks, that are neither fast nor flexible (it
+ *        has restriction limitations on both ends of the pipe).
+ *
+ * Currently we punt and implement it as a normal copy, see pipe_to_user().
+ *
+ */
 asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
                             unsigned long nr_segs, unsigned int flags)
 {
@@ -1304,11 +1510,18 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov,
        long error;
        int fput;
+        if (unlikely(nr_segs > UIO_MAXIOV))
+                return -EINVAL;
+        else if (unlikely(!nr_segs))
+                return 0;
        error = -EBADF;
        file = fget_light(fd, &fput);
        if (file) {
                if (file->f_mode & FMODE_WRITE)
-                        error = do_vmsplice(file, iov, nr_segs, flags);
+                        error = vmsplice_to_pipe(file, iov, nr_segs, flags);
+                else if (file->f_mode & FMODE_READ)
+                        error = vmsplice_to_user(file, iov, nr_segs, flags);
                fput_light(file, fput);
        }
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 0732ddb9020b..589be21d884e 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -27,7 +27,7 @@ const struct file_operations sysv_file_operations = {
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .fsync          = sysv_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
 const struct inode_operations sysv_file_inode_operations = {
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 51b5764685e7..df070bee8d4f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -261,7 +261,7 @@ const struct file_operations udf_file_operations = {
        .aio_write              = udf_file_aio_write,
        .release                = udf_release_file,
        .fsync                  = udf_fsync_file,
-        .sendfile               = generic_file_sendfile,
+        .splice_read            = generic_file_splice_read,
 };
 const struct inode_operations udf_file_inode_operations = {
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 1e096323bad4..6705d74c6d2d 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -60,5 +60,5 @@ const struct file_operations ufs_file_operations = {
        .mmap           = generic_file_mmap,
        .open           = generic_file_open,
        .fsync          = ufs_sync_file,
-        .sendfile       = generic_file_sendfile,
+        .splice_read    = generic_file_splice_read,
 };
diff --git a/fs/utimes.c b/fs/utimes.c
index 480f7c8c29da..b3c88952465f 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -106,9 +106,16 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
                if (IS_IMMUTABLE(inode))
                        goto dput_and_out;
-                if (current->fsuid != inode->i_uid &&
+                if (current->fsuid != inode->i_uid) {
-                    (error = vfs_permission(&nd, MAY_WRITE)) != 0)
+                        if (f) {
-                        goto dput_and_out;
+                                if (!(f->f_mode & FMODE_WRITE))
+                                        goto dput_and_out;
+                        } else {
+                                error = vfs_permission(&nd, MAY_WRITE);
+                                if (error)
+                                        goto dput_and_out;
+                        }
+                }
        }
        mutex_lock(&inode->i_mutex);
        error = notify_change(dentry, &newattrs);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index cb51dc961355..8c43cd2e237a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -124,30 +124,6 @@ xfs_file_aio_write_invis(
 }
 STATIC ssize_t
-xfs_file_sendfile(
-        struct file             *filp,
-        loff_t                  *pos,
-        size_t                  count,
-        read_actor_t            actor,
-        void                    *target)
-{
-        return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-                                filp, pos, 0, count, actor, target, NULL);
-}
-STATIC ssize_t
-xfs_file_sendfile_invis(
-        struct file             *filp,
-        loff_t                  *pos,
-        size_t                  count,
-        read_actor_t            actor,
-        void                    *target)
-{
-        return bhv_vop_sendfile(vn_from_inode(filp->f_path.dentry->d_inode),
-                                filp, pos, IO_INVIS, count, actor, target, NULL);
-}
-STATIC ssize_t
 xfs_file_splice_read(
        struct file             *infilp,
        loff_t                  *ppos,
@@ -452,7 +428,6 @@ const struct file_operations xfs_file_operations = {
        .write          = do_sync_write,
        .aio_read       = xfs_file_aio_read,
        .aio_write      = xfs_file_aio_write,
-        .sendfile       = xfs_file_sendfile,
        .splice_read    = xfs_file_splice_read,
        .splice_write   = xfs_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
@@ -475,7 +450,6 @@ const struct file_operations xfs_invis_file_operations = {
        .write          = do_sync_write,
        .aio_read       = xfs_file_aio_read_invis,
        .aio_write      = xfs_file_aio_write_invis,
-        .sendfile       = xfs_file_sendfile_invis,
        .splice_read    = xfs_file_splice_read_invis,
        .splice_write   = xfs_file_splice_write_invis,
        .unlocked_ioctl = xfs_file_ioctl_invis,
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 715adad7dd4d..af24a457d3a3 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -101,7 +101,6 @@
 * Feature macros (disable/enable)
 */
 #undef  HAVE_REFCACHE   /* reference cache not needed for NFS in 2.6 */
-#define HAVE_SENDFILE   /* sendfile(2) exists in 2.6, but not in 2.4 */
 #define HAVE_SPLICE     /* a splice(2) exists in 2.6, but not in 2.4 */
 #ifdef CONFIG_SMP
 #define HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index ed90403f0ee7..765ec16a6e39 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -287,50 +287,6 @@ xfs_read(
 }
 ssize_t
-xfs_sendfile(
-        bhv_desc_t              *bdp,
-        struct file             *filp,
-        loff_t                  *offset,
-        int                     ioflags,
-        size_t                  count,
-        read_actor_t            actor,
-        void                    *target,
-        cred_t                  *credp)
-{
-        xfs_inode_t             *ip = XFS_BHVTOI(bdp);
-        xfs_mount_t             *mp = ip->i_mount;
-        ssize_t                 ret;
-        XFS_STATS_INC(xs_read_calls);
-        if (XFS_FORCED_SHUTDOWN(mp))
-                return -EIO;
-        xfs_ilock(ip, XFS_IOLOCK_SHARED);
-        if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) &&
-            (!(ioflags & IO_INVIS))) {
-                bhv_vrwlock_t locktype = VRWLOCK_READ;
-                int error;
-                error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp),
-                                      *offset, count,
-                                      FILP_DELAY_FLAG(filp), &locktype);
-                if (error) {
-                        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-                        return -error;
-                }
-        }
-        xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore,
-                   (void *)(unsigned long)target, count, *offset, ioflags);
-        ret = generic_file_sendfile(filp, offset, count, actor, target);
-        if (ret > 0)
-                XFS_STATS_ADD(xs_read_bytes, ret);
-        xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-        return ret;
-}
-ssize_t
 xfs_splice_read(
        bhv_desc_t              *bdp,
        struct file             *infilp,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index 7ac51b1d2161..7c60a1eed88b 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -90,9 +90,6 @@ extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
 extern ssize_t xfs_write(struct bhv_desc *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
-extern ssize_t xfs_sendfile(struct bhv_desc *, struct file *,
-                                loff_t *, int, size_t, read_actor_t,
-                                void *, struct cred *);
 extern ssize_t xfs_splice_read(struct bhv_desc *, struct file *, loff_t *,
                                struct pipe_inode_info *, size_t, int, int,
                                struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index d1b2d01843d1..013048a92643 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -139,9 +139,6 @@ typedef ssize_t (*vop_read_t)(bhv_desc_t *, struct kiocb *,
 typedef ssize_t (*vop_write_t)(bhv_desc_t *, struct kiocb *,
                                const struct iovec *, unsigned int,
                                loff_t *, int, struct cred *);
-typedef ssize_t (*vop_sendfile_t)(bhv_desc_t *, struct file *,
-                                loff_t *, int, size_t, read_actor_t,
-                                void *, struct cred *);
 typedef ssize_t (*vop_splice_read_t)(bhv_desc_t *, struct file *, loff_t *,
                                struct pipe_inode_info *, size_t, int, int,
                                struct cred *);
@@ -206,7 +203,6 @@ typedef struct bhv_vnodeops {
        vop_close_t             vop_close;
        vop_read_t              vop_read;
        vop_write_t             vop_write;
-        vop_sendfile_t          vop_sendfile;
        vop_splice_read_t       vop_splice_read;
        vop_splice_write_t      vop_splice_write;
        vop_ioctl_t             vop_ioctl;
@@ -254,8 +250,6 @@ typedef struct bhv_vnodeops {
                VOP(vop_read, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
 #define bhv_vop_write(vp,file,iov,segs,offset,ioflags,cr)               \
                VOP(vop_write, vp)(VNHEAD(vp),file,iov,segs,offset,ioflags,cr)
-#define bhv_vop_sendfile(vp,f,off,ioflags,cnt,act,targ,cr)              \
-                VOP(vop_sendfile, vp)(VNHEAD(vp),f,off,ioflags,cnt,act,targ,cr)
 #define bhv_vop_splice_read(vp,f,o,pipe,cnt,fl,iofl,cr)                 \
                VOP(vop_splice_read, vp)(VNHEAD(vp),f,o,pipe,cnt,fl,iofl,cr)
 #define bhv_vop_splice_write(vp,f,o,pipe,cnt,fl,iofl,cr)                \
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index de17aed578f0..70bc82f65311 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -4680,9 +4680,6 @@ bhv_vnodeops_t xfs_vnodeops = {
        .vop_open               = xfs_open,
        .vop_close              = xfs_close,
        .vop_read               = xfs_read,
-#ifdef HAVE_SENDFILE
-        .vop_sendfile           = xfs_sendfile,
-#endif
 #ifdef HAVE_SPLICE
        .vop_splice_read        = xfs_splice_read,
        .vop_splice_write       = xfs_splice_write,
author	David Woodhouse <dwmw2@infradead.org>	2007-07-11 09:55:48 -0400
committer	David Woodhouse <dwmw2@infradead.org>	2007-07-11 09:55:48 -0400
commit	db1b39d8b860e3716620c225bc86e0ec41764e34 (patch)
tree	8739074db733ef767400ea92cfbfed9352ddb92d /fs
parent	a6bc432e296dfa1f05d4b586ca5ca3085a2d42d7 (diff)
parent	4eb6bf6bfb580afaf1e1a1d30cba17a078530cf4 (diff)