16 files changed, 269 insertions, 223 deletions
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
new file mode 100644
index 000000000000..701b7a3a872e
--- /dev/null
+++ b/fs/ocfs2/Kconfig
@@ -0,0 +1,85 @@
+config OCFS2_FS
+        tristate "OCFS2 file system support"
+        depends on NET && SYSFS
+        select CONFIGFS_FS
+        select JBD2
+        select CRC32
+        select QUOTA
+        select QUOTA_TREE
+        help
+          OCFS2 is a general purpose extent based shared disk cluster file
+          system with many similarities to ext3. It supports 64 bit inode
+          numbers, and has automatically extending metadata groups which may
+          also make it attractive for non-clustered use.
+          You'll want to install the ocfs2-tools package in order to at least
+          get "mount.ocfs2".
+          Project web page:    http://oss.oracle.com/projects/ocfs2
+          Tools web page:      http://oss.oracle.com/projects/ocfs2-tools
+          OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+          For more information on OCFS2, see the file
+          <file:Documentation/filesystems/ocfs2.txt>.
+config OCFS2_FS_O2CB
+        tristate "O2CB Kernelspace Clustering"
+        depends on OCFS2_FS
+        default y
+        help
+          OCFS2 includes a simple kernelspace clustering package, the OCFS2
+          Cluster Base.  It only requires a very small userspace component
+          to configure it. This comes with the standard ocfs2-tools package.
+          O2CB is limited to maintaining a cluster for OCFS2 file systems.
+          It cannot manage any other cluster applications.
+          It is always safe to say Y here, as the clustering method is
+          run-time selectable.
+config OCFS2_FS_USERSPACE_CLUSTER
+        tristate "OCFS2 Userspace Clustering"
+        depends on OCFS2_FS && DLM
+        default y
+        help
+          This option will allow OCFS2 to use userspace clustering services
+          in conjunction with the DLM in fs/dlm.  If you are using a
+          userspace cluster manager, say Y here.
+          It is safe to say Y, as the clustering method is run-time
+          selectable.
+config OCFS2_FS_STATS
+        bool "OCFS2 statistics"
+        depends on OCFS2_FS
+        default y
+        help
+          This option allows some fs statistics to be captured. Enabling
+          this option may increase the memory consumption.
+config OCFS2_DEBUG_MASKLOG
+        bool "OCFS2 logging support"
+        depends on OCFS2_FS
+        default y
+        help
+          The ocfs2 filesystem has an extensive logging system.  The system
+          allows selection of events to log via files in /sys/o2cb/logmask/.
+          This option will enlarge your kernel, but it allows debugging of
+          ocfs2 filesystem issues.
+config OCFS2_DEBUG_FS
+        bool "OCFS2 expensive checks"
+        depends on OCFS2_FS
+        default n
+        help
+          This option will enable expensive consistency checks. Enable
+          this option for debugging only as it is likely to decrease
+          performance of the filesystem.
+config OCFS2_FS_POSIX_ACL
+        bool "OCFS2 POSIX Access Control Lists"
+        depends on OCFS2_FS
+        select FS_POSIX_ACL
+        default n
+        help
+          Posix Access Control Lists (ACLs) support permissions for users and
+          groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d861096c9d81..19e3a96aa02c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -176,7 +176,8 @@ static int ocfs2_dinode_insert_check(struct inode *inode,
        BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
        mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
-                        (OCFS2_I(inode)->ip_clusters != rec->e_cpos),
+                        (OCFS2_I(inode)->ip_clusters !=
+                         le32_to_cpu(rec->e_cpos)),
                        "Device %s, asking for sparse allocation: inode %llu, "
                        "cpos %u, clusters %u\n",
                        osb->dev_str,
@@ -4796,6 +4797,29 @@ out:
        return ret;
 }
+static int ocfs2_replace_extent_rec(struct inode *inode,
+                                    handle_t *handle,
+                                    struct ocfs2_path *path,
+                                    struct ocfs2_extent_list *el,
+                                    int split_index,
+                                    struct ocfs2_extent_rec *split_rec)
+{
+        int ret;
+        ret = ocfs2_path_bh_journal_access(handle, inode, path,
+                                           path_num_items(path) - 1);
+        if (ret) {
+                mlog_errno(ret);
+                goto out;
+        }
+        el->l_recs[split_index] = *split_rec;
+        ocfs2_journal_dirty(handle, path_leaf_bh(path));
+out:
+        return ret;
+}
 /*
 * Mark part or all of the extent record at split_index in the leaf
 * pointed to by path as written. This removes the unwritten
@@ -4885,7 +4909,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
        if (ctxt.c_contig_type == CONTIG_NONE) {
                if (ctxt.c_split_covers_rec)
-                        el->l_recs[split_index] = *split_rec;
+                        ret = ocfs2_replace_extent_rec(inode, handle,
+                                                       path, el,
+                                                       split_index, split_rec);
                else
                        ret = ocfs2_split_and_insert(inode, handle, path, et,
                                                     &last_eb_bh, split_index,
@@ -5390,6 +5416,9 @@ int ocfs2_remove_btree_range(struct inode *inode,
                goto out;
        }
+        vfs_dq_free_space_nodirty(inode,
+                                  ocfs2_clusters_to_bytes(inode->i_sb, len));
        ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac,
                                  dealloc);
        if (ret) {
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a067a6cffb01..8e1709a679b7 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -227,7 +227,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
        size = i_size_read(inode);
        if (size > PAGE_CACHE_SIZE ||
-            size > ocfs2_max_inline_data(inode->i_sb)) {
+            size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
                ocfs2_error(inode->i_sb,
                            "Inode %llu has with inline data has bad size: %Lu",
                            (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1555,6 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
        int ret, written = 0;
        loff_t end = pos + len;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
+        struct ocfs2_dinode *di = NULL;
        mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
             (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
@@ -1587,7 +1588,9 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
        /*
         * Check whether the write can fit.
         */
-        if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
+        di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+        if (mmap_page ||
+            end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
                return 0;
 do_inline_write:
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b1cc7c381e88..e9d7c2038c0f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -38,6 +38,7 @@
 #include "dlmglue.h"
 #include "file.h"
 #include "inode.h"
+#include "super.h"
 static int ocfs2_dentry_revalidate(struct dentry *dentry,
@@ -294,6 +295,34 @@ out_attach:
        return ret;
 }
+static DEFINE_SPINLOCK(dentry_list_lock);
+/* We limit the number of dentry locks to drop in one go. We have
+ * this limit so that we don't starve other users of ocfs2_wq. */
+#define DL_INODE_DROP_COUNT 64
+/* Drop inode references from dentry locks */
+void ocfs2_drop_dl_inodes(struct work_struct *work)
+{
+        struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
+                                               dentry_lock_work);
+        struct ocfs2_dentry_lock *dl;
+        int drop_count = DL_INODE_DROP_COUNT;
+        spin_lock(&dentry_list_lock);
+        while (osb->dentry_lock_list && drop_count--) {
+                dl = osb->dentry_lock_list;
+                osb->dentry_lock_list = dl->dl_next;
+                spin_unlock(&dentry_list_lock);
+                iput(dl->dl_inode);
+                kfree(dl);
+                spin_lock(&dentry_list_lock);
+        }
+        if (osb->dentry_lock_list)
+                queue_work(ocfs2_wq, &osb->dentry_lock_work);
+        spin_unlock(&dentry_list_lock);
+}
 /*
 * ocfs2_dentry_iput() and friends.
 *
@@ -318,16 +347,23 @@ out_attach:
 static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
                                   struct ocfs2_dentry_lock *dl)
 {
-        iput(dl->dl_inode);
        ocfs2_simple_drop_lockres(osb, &dl->dl_lockres);
        ocfs2_lock_res_free(&dl->dl_lockres);
-        kfree(dl);
+        /* We leave dropping of inode reference to ocfs2_wq as that can
+         * possibly lead to inode deletion which gets tricky */
+        spin_lock(&dentry_list_lock);
+        if (!osb->dentry_lock_list)
+                queue_work(ocfs2_wq, &osb->dentry_lock_work);
+        dl->dl_next = osb->dentry_lock_list;
+        osb->dentry_lock_list = dl;
+        spin_unlock(&dentry_list_lock);
 }
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
                           struct ocfs2_dentry_lock *dl)
 {
-        int unlock = 0;
+        int unlock;
        BUG_ON(dl->dl_count == 0);
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index c091c34d9883..d06e16c06640 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -29,8 +29,13 @@
 extern struct dentry_operations ocfs2_dentry_ops;
 struct ocfs2_dentry_lock {
+        /* Use count of dentry lock */
        unsigned int            dl_count;
-        u64                     dl_parent_blkno;
+        union {
+                /* Linked list of dentry locks to release */
+                struct ocfs2_dentry_lock *dl_next;
+                u64                     dl_parent_blkno;
+        };
        /*
         * The ocfs2_dentry_lock keeps an inode reference until
@@ -47,6 +52,8 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
 void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
                           struct ocfs2_dentry_lock *dl);
+void ocfs2_drop_dl_inodes(struct work_struct *work);
 struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
                                      int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 54e182a27caf..0a2813947853 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1849,12 +1849,12 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
                if (!mle) {
                        if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN &&
                            res->owner != assert->node_idx) {
-                                mlog(ML_ERROR, "assert_master from "
+                                mlog(ML_ERROR, "DIE! Mastery assert from %u, "
-                                          "%u, but current owner is "
+                                     "but current owner is %u! (%.*s)\n",
-                                          "%u! (%.*s)\n",
+                                     assert->node_idx, res->owner, namelen,
-                                       assert->node_idx, res->owner,
+                                     name);
-                                       namelen, name);
+                                __dlm_print_one_lock_resource(res);
-                                goto kill;
+                                BUG();
                        }
                } else if (mle->type != DLM_MLE_MIGRATION) {
                        if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d1295203029f..4060bb328bc8 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -181,8 +181,7 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
                spin_lock(&res->spinlock);
                /* This ensures that clear refmap is sent after the set */
-                __dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_SETREF_INPROG |
+                __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
-                                                  DLM_LOCK_RES_MIGRATING));
                spin_unlock(&res->spinlock);
                /* clear our bit from the master's refmap, ignore errors */
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 86ca085ef324..fcf879ed6930 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -117,11 +117,11 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
        else
                BUG_ON(res->owner == dlm->node_num);
-        spin_lock(&dlm->spinlock);
+        spin_lock(&dlm->ast_lock);
        /* We want to be sure that we're not freeing a lock
         * that still has AST's pending... */
        in_use = !list_empty(&lock->ast_list);
-        spin_unlock(&dlm->spinlock);
+        spin_unlock(&dlm->ast_lock);
        if (in_use) {
               mlog(ML_ERROR, "lockres %.*s: Someone is calling dlmunlock "
                    "while waiting for an ast!", res->lockname.len,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b0c4cadd4c45..7219a86d34cc 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -320,9 +320,14 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
                                        struct ocfs2_lock_res *lockres);
 static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
                                                int convert);
-#define ocfs2_log_dlm_error(_func, _err, _lockres) do {                 \
+#define ocfs2_log_dlm_error(_func, _err, _lockres) do {                                 \
-        mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n", \
+        if ((_lockres)->l_type != OCFS2_LOCK_TYPE_DENTRY)                               \
-             _err, _func, _lockres->l_name);                            \
+                mlog(ML_ERROR, "DLM error %d while calling %s on resource %s\n",        \
+                     _err, _func, _lockres->l_name);                                    \
+        else                                                                            \
+                mlog(ML_ERROR, "DLM error %d while calling %s on resource %.*s%08x\n",  \
+                     _err, _func, OCFS2_DENTRY_LOCK_INO_START - 1, (_lockres)->l_name,  \
+                     (unsigned int)ocfs2_get_dentry_lock_ino(_lockres));                \
 } while (0)
 static int ocfs2_downconvert_thread(void *arg);
 static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb,
@@ -2860,6 +2865,10 @@ static void ocfs2_unlock_ast(void *opaque, int error)
        case OCFS2_UNLOCK_CANCEL_CONVERT:
                mlog(0, "Cancel convert success for %s\n", lockres->l_name);
                lockres->l_action = OCFS2_AST_INVALID;
+                /* Downconvert thread may have requeued this lock, we
+                 * need to wake it. */
+                if (lockres->l_flags & OCFS2_LOCK_BLOCKED)
+                        ocfs2_wake_downconvert_thread(ocfs2_get_lockres_osb(lockres));
                break;
        case OCFS2_UNLOCK_DROP_LOCK:
                lockres->l_level = DLM_LOCK_IV;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3c3532e1307c..172850a9a12a 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -513,8 +513,10 @@ static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
                                               loff_t new_size)
 {
-        return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
+        return jbd2_journal_begin_ordered_truncate(
-                                                   new_size);
+                                OCFS2_SB(inode->i_sb)->journal->j_journal,
+                                &OCFS2_I(inode)->ip_jinode,
+                                new_size);
 }
 #endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 084aba86c3b2..4b11762f249e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -532,7 +532,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
                fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
-                fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
+                fe->id2.i_data.id_count = cpu_to_le16(
+                                ocfs2_max_inline_data_with_xattr(osb->sb, fe));
        } else {
                fel = &fe->id2.i_list;
                fel->l_tree_depth = 0;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index ad5c24a29edd..946d3c34b90b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -210,6 +210,7 @@ struct ocfs2_journal;
 struct ocfs2_slot_info;
 struct ocfs2_recovery_map;
 struct ocfs2_quota_recovery;
+struct ocfs2_dentry_lock;
 struct ocfs2_super
 {
        struct task_struct *commit_task;
@@ -325,6 +326,11 @@ struct ocfs2_super
        struct list_head blocked_lock_list;
        unsigned long blocked_lock_count;
+        /* List of dentry locks to release. Anyone can add locks to
+         * the list, ocfs2_wq processes the list  */
+        struct ocfs2_dentry_lock *dentry_lock_list;
+        struct work_struct dentry_lock_work;
        wait_queue_head_t               osb_mount_event;
        /* Truncate log info */
@@ -335,6 +341,9 @@ struct ocfs2_super
        struct ocfs2_node_map           osb_recovering_orphan_dirs;
        unsigned int                    *osb_orphan_wipes;
        wait_queue_head_t               osb_wipe_event;
+        /* used to protect metaecc calculation check of xattr. */
+        spinlock_t osb_xattr_lock;
 };
 #define OCFS2_SB(sb)        ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c7ae45aaa36c..2332ef740f4f 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1070,12 +1070,6 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
                 offsetof(struct ocfs2_dinode, id2.i_symlink);
 }
-static inline int ocfs2_max_inline_data(struct super_block *sb)
-{
-        return sb->s_blocksize -
-                offsetof(struct ocfs2_dinode, id2.i_data.id_data);
-}
 static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
                                                   struct ocfs2_dinode *di)
 {
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 6aff8f2d3e49..1ed0f7c86869 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -754,7 +754,9 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
        if (dquot->dq_flags & mask)
                sync = 1;
        spin_unlock(&dq_data_lock);
-        if (!sync) {
+        /* This is a slight hack but we can't afford getting global quota
+         * lock if we already have a transaction started. */
+        if (!sync || journal_current_handle()) {
                status = ocfs2_write_dquot(dquot);
                goto out;
        }
@@ -810,171 +812,6 @@ out:
        return status;
 }
-/* This is difficult. We have to lock quota inode and start transaction
- * in this function but we don't want to take the penalty of exlusive
- * quota file lock when we are just going to use cached structures. So
- * we just take read lock check whether we have dquot cached and if so,
- * we don't have to take the write lock... */
-static int ocfs2_dquot_initialize(struct inode *inode, int type)
-{
-        handle_t *handle = NULL;
-        int status = 0;
-        struct super_block *sb = inode->i_sb;
-        struct ocfs2_mem_dqinfo *oinfo;
-        int exclusive = 0;
-        int cnt;
-        qid_t id;
-        mlog_entry_void();
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                if (type != -1 && cnt != type)
-                        continue;
-                if (!sb_has_quota_active(sb, cnt))
-                        continue;
-                oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-                status = ocfs2_lock_global_qf(oinfo, 0);
-                if (status < 0)
-                        goto out;
-                /* This is just a performance optimization not a reliable test.
-                 * Since we hold an inode lock, noone can actually release
-                 * the structure until we are finished with initialization. */
-                if (inode->i_dquot[cnt] != NODQUOT) {
-                        ocfs2_unlock_global_qf(oinfo, 0);
-                        continue;
-                }
-                /* When we have inode lock, we know that no dquot_release() can
-                 * run and thus we can safely check whether we need to
-                 * read+modify global file to get quota information or whether
-                 * our node already has it. */
-                if (cnt == USRQUOTA)
-                        id = inode->i_uid;
-                else if (cnt == GRPQUOTA)
-                        id = inode->i_gid;
-                else
-                        BUG();
-                /* Obtain exclusion from quota off... */
-                down_write(&sb_dqopt(sb)->dqptr_sem);
-                exclusive = !dquot_is_cached(sb, id, cnt);
-                up_write(&sb_dqopt(sb)->dqptr_sem);
-                if (exclusive) {
-                        status = ocfs2_lock_global_qf(oinfo, 1);
-                        if (status < 0) {
-                                exclusive = 0;
-                                mlog_errno(status);
-                                goto out_ilock;
-                        }
-                        handle = ocfs2_start_trans(OCFS2_SB(sb),
-                                        ocfs2_calc_qinit_credits(sb, cnt));
-                        if (IS_ERR(handle)) {
-                                status = PTR_ERR(handle);
-                                mlog_errno(status);
-                                goto out_ilock;
-                        }
-                }
-                dquot_initialize(inode, cnt);
-                if (exclusive) {
-                        ocfs2_commit_trans(OCFS2_SB(sb), handle);
-                        ocfs2_unlock_global_qf(oinfo, 1);
-                }
-                ocfs2_unlock_global_qf(oinfo, 0);
-        }
-        mlog_exit(0);
-        return 0;
-out_ilock:
-        if (exclusive)
-                ocfs2_unlock_global_qf(oinfo, 1);
-        ocfs2_unlock_global_qf(oinfo, 0);
-out:
-        mlog_exit(status);
-        return status;
-}
-static int ocfs2_dquot_drop_slow(struct inode *inode)
-{
-        int status = 0;
-        int cnt;
-        int got_lock[MAXQUOTAS] = {0, 0};
-        handle_t *handle;
-        struct super_block *sb = inode->i_sb;
-        struct ocfs2_mem_dqinfo *oinfo;
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                if (!sb_has_quota_active(sb, cnt))
-                        continue;
-                oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-                status = ocfs2_lock_global_qf(oinfo, 1);
-                if (status < 0)
-                        goto out;
-                got_lock[cnt] = 1;
-        }
-        handle = ocfs2_start_trans(OCFS2_SB(sb),
-                        ocfs2_calc_qinit_credits(sb, USRQUOTA) +
-                        ocfs2_calc_qinit_credits(sb, GRPQUOTA));
-        if (IS_ERR(handle)) {
-                status = PTR_ERR(handle);
-                mlog_errno(status);
-                goto out;
-        }
-        dquot_drop(inode);
-        ocfs2_commit_trans(OCFS2_SB(sb), handle);
-out:
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-                if (got_lock[cnt]) {
-                        oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-                        ocfs2_unlock_global_qf(oinfo, 1);
-                }
-        return status;
-}
-/* See the comment before ocfs2_dquot_initialize. */
-static int ocfs2_dquot_drop(struct inode *inode)
-{
-        int status = 0;
-        struct super_block *sb = inode->i_sb;
-        struct ocfs2_mem_dqinfo *oinfo;
-        int exclusive = 0;
-        int cnt;
-        int got_lock[MAXQUOTAS] = {0, 0};
-        mlog_entry_void();
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                if (!sb_has_quota_active(sb, cnt))
-                        continue;
-                oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-                status = ocfs2_lock_global_qf(oinfo, 0);
-                if (status < 0)
-                        goto out;
-                got_lock[cnt] = 1;
-        }
-        /* Lock against anyone releasing references so that when when we check
-         * we know we are not going to be last ones to release dquot */
-        down_write(&sb_dqopt(sb)->dqptr_sem);
-        /* Urgh, this is a terrible hack :( */
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-                if (inode->i_dquot[cnt] != NODQUOT &&
-                    atomic_read(&inode->i_dquot[cnt]->dq_count) > 1) {
-                        exclusive = 1;
-                        break;
-                }
-        }
-        if (!exclusive)
-                dquot_drop_locked(inode);
-        up_write(&sb_dqopt(sb)->dqptr_sem);
-out:
-        for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-                if (got_lock[cnt]) {
-                        oinfo = sb_dqinfo(sb, cnt)->dqi_priv;
-                        ocfs2_unlock_global_qf(oinfo, 0);
-                }
-        /* In case we bailed out because we had to do expensive locking
-         * do it now... */
-        if (exclusive)
-                status = ocfs2_dquot_drop_slow(inode);
-        mlog_exit(status);
-        return status;
-}
 static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type)
 {
        struct ocfs2_dquot *dquot =
@@ -991,8 +828,8 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 }
 struct dquot_operations ocfs2_quota_operations = {
-        .initialize     = ocfs2_dquot_initialize,
+        .initialize     = dquot_initialize,
-        .drop           = ocfs2_dquot_drop,
+        .drop           = dquot_drop,
        .alloc_space    = dquot_alloc_space,
        .alloc_inode    = dquot_alloc_inode,
        .free_space     = dquot_free_space,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 43ed11345b59..7ac83a81ee55 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1537,6 +1537,13 @@ static int ocfs2_get_sector(struct super_block *sb,
        unlock_buffer(*bh);
        ll_rw_block(READ, 1, bh);
        wait_on_buffer(*bh);
+        if (!buffer_uptodate(*bh)) {
+                mlog_errno(-EIO);
+                brelse(*bh);
+                *bh = NULL;
+                return -EIO;
+        }
        return 0;
 }
@@ -1747,6 +1754,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
        INIT_LIST_HEAD(&osb->blocked_lock_list);
        osb->blocked_lock_count = 0;
        spin_lock_init(&osb->osb_lock);
+        spin_lock_init(&osb->osb_xattr_lock);
        ocfs2_init_inode_steal_slot(osb);
        atomic_set(&osb->alloc_stats.moves, 0);
@@ -1887,6 +1895,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
        INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery);
        journal->j_state = OCFS2_JOURNAL_FREE;
+        INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes);
+        osb->dentry_lock_list = NULL;
        /* get some pseudo constants for clustersize bits */
        osb->s_clustersize_bits =
                le32_to_cpu(di->id2.i_super.s_clustersize_bits);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e1d638af6ac3..2563df89fc2a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -82,13 +82,14 @@ struct ocfs2_xattr_set_ctxt {
 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
 #define OCFS2_XATTR_INLINE_SIZE 80
+#define OCFS2_XATTR_HEADER_GAP  4
 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
                                         - sizeof(struct ocfs2_xattr_header) \
-                                         - sizeof(__u32))
+                                         - OCFS2_XATTR_HEADER_GAP)
 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
                                         - sizeof(struct ocfs2_xattr_block) \
                                         - sizeof(struct ocfs2_xattr_header) \
-                                         - sizeof(__u32))
+                                         - OCFS2_XATTR_HEADER_GAP)
 static struct ocfs2_xattr_def_value_root def_xv = {
        .xv.xr_list.l_count = cpu_to_le16(1),
@@ -274,10 +275,12 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
                               bucket->bu_blocks, bucket->bu_bhs, 0,
                               NULL);
        if (!rc) {
+                spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
                rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
                                                 bucket->bu_bhs,
                                                 bucket->bu_blocks,
                                                 &bucket_xh(bucket)->xh_check);
+                spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
                if (rc)
                        mlog_errno(rc);
        }
@@ -310,9 +313,11 @@ static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
 {
        int i;
+        spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
        ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
                                   bucket->bu_bhs, bucket->bu_blocks,
                                   &bucket_xh(bucket)->xh_check);
+        spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
        for (i = 0; i < bucket->bu_blocks; i++)
                ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
@@ -542,8 +547,12 @@ int ocfs2_calc_xattr_init(struct inode *dir,
         * when blocksize = 512, may reserve one more cluser for
         * xattr bucket, otherwise reserve one metadata block
         * for them is ok.
+         * If this is a new directory with inline data,
+         * we choose to reserve the entire inline area for
+         * directory contents and force an external xattr block.
         */
        if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
+            (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
            (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
                ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
                if (ret) {
@@ -1507,7 +1516,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
                last += 1;
        }
-        free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
        if (free < 0)
                return -EIO;
@@ -2190,7 +2199,7 @@ static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
                last += 1;
        }
-        free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
+        free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
        if (free < 0)
                return 0;
@@ -2592,8 +2601,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
        if (!ret) {
                /* Update inode ctime. */
-                ret = ocfs2_journal_access(ctxt->handle, inode, xis->inode_bh,
+                ret = ocfs2_journal_access_di(ctxt->handle, inode,
-                                           OCFS2_JOURNAL_ACCESS_WRITE);
+                                              xis->inode_bh,
+                                              OCFS2_JOURNAL_ACCESS_WRITE);
                if (ret) {
                        mlog_errno(ret);
                        goto out;
@@ -4729,13 +4739,6 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
        vb.vb_xv = (struct ocfs2_xattr_value_root *)
                (vb.vb_bh->b_data + offset % blocksize);
-        ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
-                                                OCFS2_JOURNAL_ACCESS_WRITE);
-        if (ret) {
-                mlog_errno(ret);
-                goto out;
-        }
        /*
         * From here on out we have to dirty the bucket.  The generic
         * value calls only modify one of the bucket's bhs, but we need
@@ -4748,12 +4751,18 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
        ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
        if (ret) {
                mlog_errno(ret);
-                goto out_dirty;
+                goto out;
+        }
+        ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
+                                                OCFS2_JOURNAL_ACCESS_WRITE);
+        if (ret) {
+                mlog_errno(ret);
+                goto out;
        }
        xe->xe_value_size = cpu_to_le64(len);
-out_dirty:
        ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
 out:
@@ -4786,19 +4795,33 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
                                                char *val,
                                                int value_len)
 {
-        int offset;
+        int ret, offset, block_off;
        struct ocfs2_xattr_value_root *xv;
        struct ocfs2_xattr_entry *xe = xs->here;
+        struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
+        void *base;
        BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
-        offset = le16_to_cpu(xe->xe_name_offset) +
+        ret = ocfs2_xattr_bucket_get_name_value(inode, xh,
-                 OCFS2_XATTR_SIZE(xe->xe_name_len);
+                                                xe - xh->xh_entries,
+                                                &block_off,
+                                                &offset);
+        if (ret) {
+                mlog_errno(ret);
+                goto out;
+        }
-        xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
+        base = bucket_block(xs->bucket, block_off);
+        xv = (struct ocfs2_xattr_value_root *)(base + offset +
+                 OCFS2_XATTR_SIZE(xe->xe_name_len));
-        return __ocfs2_xattr_set_value_outside(inode, handle,
+        ret = __ocfs2_xattr_set_value_outside(inode, handle,
-                                               xv, val, value_len);
+                                              xv, val, value_len);
+        if (ret)
+                mlog_errno(ret);
+out:
+        return ret;
 }
 static int ocfs2_rm_xattr_cluster(struct inode *inode,
@@ -5061,8 +5084,8 @@ try_again:
        xh_free_start = le16_to_cpu(xh->xh_free_start);
        header_size = sizeof(struct ocfs2_xattr_header) +
                        count * sizeof(struct ocfs2_xattr_entry);
-        max_free = OCFS2_XATTR_BUCKET_SIZE -
+        max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
-                le16_to_cpu(xh->xh_name_value_len) - header_size;
+                le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
        mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
                        "of %u which exceed block size\n",
@@ -5095,7 +5118,7 @@ try_again:
                        need = 0;
        }
-        free = xh_free_start - header_size;
+        free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
        /*
         * We need to make sure the new name/value pair
         * can exist in the same block.
@@ -5128,7 +5151,8 @@ try_again:
                        }
                        xh_free_start = le16_to_cpu(xh->xh_free_start);
-                        free = xh_free_start - header_size;
+                        free = xh_free_start - header_size
+                                - OCFS2_XATTR_HEADER_GAP;
                        if (xh_free_start % blocksize < need)
                                free -= xh_free_start % blocksize;