38 files changed, 811 insertions, 450 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 4fd6b0c5c6b5..e38b2c589b54 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -267,8 +267,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
                inode_unlock(inode);
-        if ((retval > 0) && end_io)
+        if (end_io) {
-                end_io(iocb, pos, retval, bh.b_private);
+                int err;
+                err = end_io(iocb, pos, retval, bh.b_private);
+                if (err)
+                        retval = err;
+        }
        if (!(flags & DIO_SKIP_DIO_COUNT))
                inode_dio_end(inode);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ffc8b84..9c6f885cc518 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
        if (ret == 0)
                ret = transferred;
-        if (dio->end_io && dio->result)
+        if (dio->end_io) {
-                dio->end_io(dio->iocb, offset, transferred, dio->private);
+                int err;
+                err = dio->end_io(dio->iocb, offset, ret, dio->private);
+                if (err)
+                        ret = err;
+        }
        if (!(dio->flags & DIO_SKIP_DIO_COUNT))
                inode_dio_end(dio->inode);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b285dc8a..56c12df107ab 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1504,15 +1504,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
                 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
-static inline void ext4_set_io_unwritten_flag(struct inode *inode,
-                                              struct ext4_io_end *io_end)
-{
-        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
-                io_end->flag |= EXT4_IO_END_UNWRITTEN;
-                atomic_inc(&EXT4_I(inode)->i_unwritten);
-        }
-}
 static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
 {
        return inode->i_private;
@@ -3293,6 +3284,27 @@ extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 extern int ext4_resize_begin(struct super_block *sb);
 extern void ext4_resize_end(struct super_block *sb);
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+                                              struct ext4_io_end *io_end)
+{
+        if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+                io_end->flag |= EXT4_IO_END_UNWRITTEN;
+                atomic_inc(&EXT4_I(inode)->i_unwritten);
+        }
+}
+static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
+{
+        struct inode *inode = io_end->inode;
+        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+                io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
+                /* Wake up anyone waiting on unwritten extent conversion */
+                if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+                        wake_up_all(ext4_ioend_wq(inode));
+        }
+}
 #endif  /* __KERNEL__ */
 #define EFSBADCRC       EBADMSG         /* Bad CRC detected */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bfb3bea..2b98171a9432 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3161,14 +3161,14 @@ out:
 }
 #endif
-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
+static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                            ssize_t size, void *private)
 {
        ext4_io_end_t *io_end = iocb->private;
        /* if not async direct IO just return */
        if (!io_end)
-                return;
+                return 0;
        ext_debug("ext4_end_io_dio(): io_end 0x%p "
                  "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3176,9 +3176,19 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
                  size);
        iocb->private = NULL;
+        /*
+         * Error during AIO DIO. We cannot convert unwritten extents as the
+         * data was not written. Just clear the unwritten flag and drop io_end.
+         */
+        if (size <= 0) {
+                ext4_clear_io_unwritten_flag(io_end);
+                size = 0;
+        }
        io_end->offset = offset;
        io_end->size = size;
        ext4_put_io_end(io_end);
+        return 0;
 }
 /*
@@ -3301,16 +3311,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        if (io_end) {
                ext4_inode_aio_set(inode, NULL);
                ext4_put_io_end(io_end);
-                /*
-                 * When no IO was submitted ext4_end_io_dio() was not
-                 * called so we have to put iocb's reference.
-                 */
-                if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
-                        WARN_ON(iocb->private != io_end);
-                        WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
-                        ext4_put_io_end(io_end);
-                        iocb->private = NULL;
-                }
        }
        if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
                                                EXT4_STATE_DIO_UNWRITTEN)) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 090b3498638e..f49a87c4fb63 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -139,16 +139,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
        kmem_cache_free(io_end_cachep, io_end);
 }
-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
-{
-        struct inode *inode = io_end->inode;
-        io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
-        /* Wake up anyone waiting on unwritten extent conversion */
-        if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
-                wake_up_all(ext4_ioend_wq(inode));
-}
 /*
 * Check a range of space and convert unwritten extents to written. Note that
 * we are protected from truncate touching same part of extent tree by the
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd1587f34..5dcc5f5a842e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -620,7 +620,7 @@ bail:
 * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
 * to protect io on one node from truncation on another.
 */
-static void ocfs2_dio_end_io(struct kiocb *iocb,
+static int ocfs2_dio_end_io(struct kiocb *iocb,
                             loff_t offset,
                             ssize_t bytes,
                             void *private)
@@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
        struct inode *inode = file_inode(iocb->ki_filp);
        int level;
+        if (bytes <= 0)
+                return 0;
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
                level = ocfs2_iocb_rw_locked_level(iocb);
                ocfs2_rw_unlock(inode, level);
        }
+        return 0;
 }
 static int ocfs2_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 3746367098fd..0ebc90496525 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type)
        return 0;
 }
-static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
+static int quota_quotaon(struct super_block *sb, int type, qid_t id,
                         struct path *path)
 {
        if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
@@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
        return 0;
 }
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk
+ */
+static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
+                          void __user *addr)
+{
+        struct kqid qid;
+        struct qc_dqblk fdq;
+        struct if_nextdqblk idq;
+        int ret;
+        if (!sb->s_qcop->get_nextdqblk)
+                return -ENOSYS;
+        qid = make_kqid(current_user_ns(), type, id);
+        if (!qid_valid(qid))
+                return -EINVAL;
+        ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
+        if (ret)
+                return ret;
+        /* struct if_nextdqblk is a superset of struct if_dqblk */
+        copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq);
+        idq.dqb_id = from_kqid(current_user_ns(), qid);
+        if (copy_to_user(addr, &idq, sizeof(idq)))
+                return -EFAULT;
+        return 0;
+}
 static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
 {
        dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
@@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
        return ret;
 }
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ESRCH via ->get_nextdqblk.
+ */
+static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
+                            void __user *addr)
+{
+        struct fs_disk_quota fdq;
+        struct qc_dqblk qdq;
+        struct kqid qid;
+        qid_t id_out;
+        int ret;
+        if (!sb->s_qcop->get_nextdqblk)
+                return -ENOSYS;
+        qid = make_kqid(current_user_ns(), type, id);
+        if (!qid_valid(qid))
+                return -EINVAL;
+        ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
+        if (ret)
+                return ret;
+        id_out = from_kqid(current_user_ns(), qid);
+        copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
+        if (copy_to_user(addr, &fdq, sizeof(fdq)))
+                return -EFAULT;
+        return ret;
+}
 static int quota_rmxquota(struct super_block *sb, void __user *addr)
 {
        __u32 flags;
@@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
        switch (cmd) {
        case Q_QUOTAON:
-                return quota_quotaon(sb, type, cmd, id, path);
+                return quota_quotaon(sb, type, id, path);
        case Q_QUOTAOFF:
                return quota_quotaoff(sb, type);
        case Q_GETFMT:
@@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
                return quota_setinfo(sb, type, addr);
        case Q_GETQUOTA:
                return quota_getquota(sb, type, id, addr);
+        case Q_GETNEXTQUOTA:
+                return quota_getnextquota(sb, type, id, addr);
        case Q_SETQUOTA:
                return quota_setquota(sb, type, id, addr);
        case Q_SYNC:
@@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
                return quota_setxquota(sb, type, id, addr);
        case Q_XGETQUOTA:
                return quota_getxquota(sb, type, id, addr);
+        case Q_XGETNEXTQUOTA:
+                return quota_getnextxquota(sb, type, id, addr);
        case Q_XQUOTASYNC:
                if (sb->s_flags & MS_RDONLY)
                        return -EROFS;
@@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd)
        switch (cmd) {
        case Q_GETFMT:
        case Q_GETINFO:
+        case Q_GETNEXTQUOTA:
        case Q_SYNC:
        case Q_XGETQSTAT:
        case Q_XGETQSTATV:
        case Q_XGETQUOTA:
+        case Q_XGETNEXTQUOTA:
        case Q_XQUOTASYNC:
                return 0;
        }
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 444626ddbd1b..d9b42425291e 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -118,8 +118,6 @@ xfs_allocbt_free_block(
        xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
                              XFS_EXTENT_BUSY_SKIP_DISCARD);
        xfs_trans_agbtree_delta(cur->bc_tp, -1);
-        xfs_trans_binval(cur->bc_tp, bp);
        return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 919756e3ba53..90928bbe693c 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -24,22 +24,6 @@
 * Small attribute lists are packed as tightly as possible so as
 * to fit into the literal area of the inode.
 */
-/*
- * Entries are packed toward the top as tight as possible.
- */
-typedef struct xfs_attr_shortform {
-        struct xfs_attr_sf_hdr {        /* constant-structure header block */
-                __be16  totsize;        /* total bytes in shortform list */
-                __u8    count;  /* count of active entries */
-        } hdr;
-        struct xfs_attr_sf_entry {
-                __uint8_t namelen;      /* actual length of name (no NULL) */
-                __uint8_t valuelen;     /* actual length of value (no NULL) */
-                __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
-                __uint8_t nameval[1];   /* name & value bytes concatenated */
-        } list[1];                      /* variable sized array */
-} xfs_attr_shortform_t;
 typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
 typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 1637c37bfbaa..e37508ae589b 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -531,7 +531,6 @@ xfs_bmbt_free_block(
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
-        xfs_trans_binval(tp, bp);
        return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index a0eb18ce3ad3..1f88e1ce770f 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc(
        return true;
 }
+static int
+xfs_btree_free_block(
+        struct xfs_btree_cur    *cur,
+        struct xfs_buf          *bp)
+{
+        int                     error;
+        error = cur->bc_ops->free_block(cur, bp);
+        if (!error) {
+                xfs_trans_binval(cur->bc_tp, bp);
+                XFS_BTREE_STATS_INC(cur, free);
+        }
+        return error;
+}
 /*
 * Delete the btree cursor.
 */
@@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot(
        int                     level;
        int                     index;
        int                     numrecs;
+        int                     error;
 #ifdef DEBUG
        union xfs_btree_ptr     ptr;
        int                     i;
@@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot(
        cpp = xfs_btree_ptr_addr(cur, 1, cblock);
 #ifdef DEBUG
        for (i = 0; i < numrecs; i++) {
-                int             error;
                error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
                if (error) {
                        XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
@@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot(
 #endif
        xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
-        cur->bc_ops->free_block(cur, cbp);
+        error = xfs_btree_free_block(cur, cbp);
-        XFS_BTREE_STATS_INC(cur, free);
+        if (error) {
+                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+                return error;
+        }
        cur->bc_bufs[level - 1] = NULL;
        be16_add_cpu(&block->bb_level, -1);
@@ -3317,14 +3334,12 @@ xfs_btree_kill_root(
         */
        cur->bc_ops->set_root(cur, newroot, -1);
-        error = cur->bc_ops->free_block(cur, bp);
+        error = xfs_btree_free_block(cur, bp);
        if (error) {
                XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
                return error;
        }
-        XFS_BTREE_STATS_INC(cur, free);
        cur->bc_bufs[level] = NULL;
        cur->bc_ra[level] = 0;
        cur->bc_nlevels--;
@@ -3830,10 +3845,9 @@ xfs_btree_delrec(
        }
        /* Free the deleted block. */
-        error = cur->bc_ops->free_block(cur, rbp);
+        error = xfs_btree_free_block(cur, rbp);
        if (error)
                goto error0;
-        XFS_BTREE_STATS_INC(cur, free);
        /*
         * If we joined with the left neighbor, set the buffer in the
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b14bbd6bb05f..8d4d8bce41bf 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
 */
 #define XFS_ATTR_LEAF_MAPSIZE   3       /* how many freespace slots */
+/*
+ * Entries are packed toward the top as tight as possible.
+ */
+typedef struct xfs_attr_shortform {
+        struct xfs_attr_sf_hdr {        /* constant-structure header block */
+                __be16  totsize;        /* total bytes in shortform list */
+                __u8    count;  /* count of active entries */
+        } hdr;
+        struct xfs_attr_sf_entry {
+                __uint8_t namelen;      /* actual length of name (no NULL) */
+                __uint8_t valuelen;     /* actual length of value (no NULL) */
+                __uint8_t flags;        /* flags bits (see xfs_attr_leaf.h) */
+                __uint8_t nameval[1];   /* name & value bytes concatenated */
+        } list[1];                      /* variable sized array */
+} xfs_attr_shortform_t;
 typedef struct xfs_attr_leaf_map {      /* RLE map of free bytes */
        __be16  base;                     /* base of free region */
        __be16  size;                     /* length of free region */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index c679f3c05b63..89c21d771e35 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -125,16 +125,8 @@ xfs_inobt_free_block(
        struct xfs_btree_cur    *cur,
        struct xfs_buf          *bp)
 {
-        xfs_fsblock_t           fsbno;
+        return xfs_free_extent(cur->bc_tp,
-        int                     error;
+                        XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
-        fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
-        error = xfs_free_extent(cur->bc_tp, fsbno, 1);
-        if (error)
-                return error;
-        xfs_trans_binval(cur->bc_tp, bp);
-        return error;
 }
 STATIC int
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 0bf1c747439d..11faf7df14c8 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -31,6 +31,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_attr_sf.h"
+#include "xfs_da_format.h"
 kmem_zone_t *xfs_ifork_zone;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 03f90b99b8c8..d54a8018b079 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -496,6 +496,8 @@ enum xfs_blft {
        XFS_BLFT_ATTR_LEAF_BUF,
        XFS_BLFT_ATTR_RMT_BUF,
        XFS_BLFT_SB_BUF,
+        XFS_BLFT_RTBITMAP_BUF,
+        XFS_BLFT_RTSUMMARY_BUF,
        XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
 };
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index f51078f1e92a..8eed51275bb3 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -37,7 +37,7 @@ typedef __uint16_t	xfs_qwarncnt_t;
 #define XFS_DQ_PROJ             0x0002          /* project quota */
 #define XFS_DQ_GROUP            0x0004          /* a group quota */
 #define XFS_DQ_DIRTY            0x0008          /* dquot is dirty */
-#define XFS_DQ_FREEING          0x0010          /* dquot is beeing torn down */
+#define XFS_DQ_FREEING          0x0010          /* dquot is being torn down */
 #define XFS_DQ_ALLTYPES         (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
@@ -116,6 +116,7 @@ typedef __uint16_t	xfs_qwarncnt_t;
 #define XFS_QMOPT_DQREPAIR      0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA        0x0002000 /* group dquot requested */
 #define XFS_QMOPT_ENOSPC        0x0004000 /* enospc instead of edquot (prj) */
+#define XFS_QMOPT_DQNEXT        0x0008000 /* return next dquot >= this ID */
 /*
 * flags to xfs_trans_mod_dquot to indicate which field needs to be
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index acc71dd36a2b..951c044e24e4 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -42,6 +42,31 @@
 */
 /*
+ * Real time buffers need verifiers to avoid runtime warnings during IO.
+ * We don't have anything to verify, however, so these are just dummy
+ * operations.
+ */
+static void
+xfs_rtbuf_verify_read(
+        struct xfs_buf  *bp)
+{
+        return;
+}
+static void
+xfs_rtbuf_verify_write(
+        struct xfs_buf  *bp)
+{
+        return;
+}
+const struct xfs_buf_ops xfs_rtbuf_ops = {
+        .name = "rtbuf",
+        .verify_read = xfs_rtbuf_verify_read,
+        .verify_write = xfs_rtbuf_verify_write,
+};
+/*
 * Get a buffer for the bitmap or summary file block specified.
 * The buffer is returned read and locked.
 */
@@ -68,9 +93,12 @@ xfs_rtbuf_get(
        ASSERT(map.br_startblock != NULLFSBLOCK);
        error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                   XFS_FSB_TO_DADDR(mp, map.br_startblock),
-                                   mp->m_bsize, 0, &bp, NULL);
+                                   mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
        if (error)
                return error;
+        xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
+                                             : XFS_BLFT_RTBITMAP_BUF);
        *bpp = bp;
        return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index b25bb9a343f3..961e6475a309 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
 extern void     xfs_perag_put(struct xfs_perag *pag);
 extern int      xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
-extern void     xfs_sb_calc_crc(struct xfs_buf *bp);
 extern void     xfs_log_sb(struct xfs_trans *tp);
 extern int      xfs_sync_sb(struct xfs_mount *mp, bool wait);
 extern void     xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 15c3ceb845b9..81ac870834da 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
 extern const struct xfs_buf_ops xfs_sb_buf_ops;
 extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
 extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbuf_ops;
 /*
 * Transaction types.  Used to distinguish types of buffers. These never reach
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089fb051..14ac9822b303 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,10 @@
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
+/* flags for direct write completions */
+#define XFS_DIO_FLAG_UNWRITTEN  (1 << 0)
+#define XFS_DIO_FLAG_APPEND     (1 << 1)
 void
 xfs_count_page_state(
        struct page             *page,
@@ -214,10 +218,12 @@ xfs_end_io(
        struct xfs_inode *ip = XFS_I(ioend->io_inode);
        int             error = 0;
-        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+        /*
+         * Set an error if the mount has shut down and proceed with end I/O
+         * processing so it can perform whatever cleanups are necessary.
+         */
+        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                ioend->io_error = -EIO;
-                goto done;
-        }
        /*
         * For unwritten extents we need to issue transactions to convert a
@@ -1238,27 +1244,8 @@ xfs_vm_releasepage(
 }
 /*
- * When we map a DIO buffer, we may need to attach an ioend that describes the
+ * When we map a DIO buffer, we may need to pass flags to
- * type of write IO we are doing. This passes to the completion function the
+ * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
- * operations it needs to perform. If the mapping is for an overwrite wholly
- * within the EOF then we don't need an ioend and so we don't allocate one.
- * This avoids the unnecessary overhead of allocating and freeing ioends for
- * workloads that don't require transactions on IO completion.
- *
- * If we get multiple mappings in a single IO, we might be mapping different
- * types. But because the direct IO can only have a single private pointer, we
- * need to ensure that:
- *
- * a) i) the ioend spans the entire region of unwritten mappings; or
- *    ii) the ioend spans all the mappings that cross or are beyond EOF; and
- * b) if it contains unwritten extents, it is *permanently* marked as such
- *
- * We could do this by chaining ioends like buffered IO does, but we only
- * actually get one IO completion callback from the direct IO, and that spans
- * the entire IO regardless of how many mappings and IOs are needed to complete
- * the DIO. There is only going to be one reference to the ioend and its life
- * cycle is constrained by the DIO completion code. hence we don't need
- * reference counting here.
 *
 * Note that for DIO, an IO to the highest supported file block offset (i.e.
 * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
@@ -1266,68 +1253,26 @@ xfs_vm_releasepage(
 * extending the file size. We won't know for sure until IO completion is run
 * and the actual max write offset is communicated to the IO completion
 * routine.
- *
- * For DAX page faults, we are preparing to never see unwritten extents here,
- * nor should we ever extend the inode size. Hence we will soon have nothing to
- * do here for this case, ensuring we don't have to provide an IO completion
- * callback to free an ioend that we don't actually need for a fault into the
- * page at offset (2^63 - 1FSB) bytes.
 */
 static void
 xfs_map_direct(
        struct inode            *inode,
        struct buffer_head      *bh_result,
        struct xfs_bmbt_irec    *imap,
-        xfs_off_t               offset,
+        xfs_off_t               offset)
-        bool                    dax_fault)
 {
-        struct xfs_ioend        *ioend;
+        uintptr_t               *flags = (uintptr_t *)&bh_result->b_private;
        xfs_off_t               size = bh_result->b_size;
-        int                     type;
-        if (ISUNWRITTEN(imap))
+        trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
-                type = XFS_IO_UNWRITTEN;
+                ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
-        else
-                type = XFS_IO_OVERWRITE;
-        trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
-        if (dax_fault) {
-                ASSERT(type == XFS_IO_OVERWRITE);
-                trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
-                                            imap);
-                return;
-        }
-        if (bh_result->b_private) {
-                ioend = bh_result->b_private;
-                ASSERT(ioend->io_size > 0);
-                ASSERT(offset >= ioend->io_offset);
-                if (offset + size > ioend->io_offset + ioend->io_size)
-                        ioend->io_size = offset - ioend->io_offset + size;
-                if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
-                        ioend->io_type = XFS_IO_UNWRITTEN;
-                trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
-                                              ioend->io_size, ioend->io_type,
-                                              imap);
-        } else if (type == XFS_IO_UNWRITTEN ||
-                   offset + size > i_size_read(inode) ||
-                   offset + size < 0) {
-                ioend = xfs_alloc_ioend(inode, type);
-                ioend->io_offset = offset;
-                ioend->io_size = size;
-                bh_result->b_private = ioend;
+        if (ISUNWRITTEN(imap)) {
+                *flags |= XFS_DIO_FLAG_UNWRITTEN;
+                set_buffer_defer_completion(bh_result);
+        } else if (offset + size > i_size_read(inode) || offset + size < 0) {
+                *flags |= XFS_DIO_FLAG_APPEND;
                set_buffer_defer_completion(bh_result);
-                trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
-                                           imap);
-        } else {
-                trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
-                                            imap);
        }
 }
@@ -1498,9 +1443,12 @@ __xfs_get_blocks(
                if (ISUNWRITTEN(&imap))
                        set_buffer_unwritten(bh_result);
                /* direct IO needs special help */
-                if (create && direct)
+                if (create && direct) {
-                        xfs_map_direct(inode, bh_result, &imap, offset,
+                        if (dax_fault)
-                                       dax_fault);
+                                ASSERT(!ISUNWRITTEN(&imap));
+                        else
+                                xfs_map_direct(inode, bh_result, &imap, offset);
+                }
        }
        /*
@@ -1570,42 +1518,50 @@ xfs_get_blocks_dax_fault(
        return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
 }
-static void
+/*
-__xfs_end_io_direct_write(
+ * Complete a direct I/O write request.
-        struct inode            *inode,
+ *
-        struct xfs_ioend        *ioend,
+ * xfs_map_direct passes us some flags in the private data to tell us what to
+ * do.  If no flags are set, then the write IO is an overwrite wholly within
+ * the existing allocated file size and so there is nothing for us to do.
+ *
+ * Note that in this case the completion can be called in interrupt context,
+ * whereas if we have flags set we will always be called in task context
+ * (i.e. from a workqueue).
+ */
+STATIC int
+xfs_end_io_direct_write(
+        struct kiocb            *iocb,
        loff_t                  offset,
-        ssize_t                 size)
+        ssize_t                 size,
+        void                    *private)
 {
-        struct xfs_mount        *mp = XFS_I(inode)->i_mount;
+        struct inode            *inode = file_inode(iocb->ki_filp);
+        struct xfs_inode        *ip = XFS_I(inode);
+        struct xfs_mount        *mp = ip->i_mount;
+        uintptr_t               flags = (uintptr_t)private;
+        int                     error = 0;
-        if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
+        trace_xfs_end_io_direct_write(ip, offset, size);
-                goto out_end_io;
-        /*
+        if (XFS_FORCED_SHUTDOWN(mp))
-         * dio completion end_io functions are only called on writes if more
+                return -EIO;
-         * than 0 bytes was written.
-         */
-        ASSERT(size > 0);
-        /*
+        if (size <= 0)
-         * The ioend only maps whole blocks, while the IO may be sector aligned.
+                return size;
-         * Hence the ioend offset/size may not match the IO offset/size exactly.
-         * Because we don't map overwrites within EOF into the ioend, the offset
-         * may not match, but only if the endio spans EOF.  Either way, write
-         * the IO sizes into the ioend so that completion processing does the
-         * right thing.
-         */
-        ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
-        ioend->io_size = size;
-        ioend->io_offset = offset;
        /*
-         * The ioend tells us whether we are doing unwritten extent conversion
+         * The flags tell us whether we are doing unwritten extent conversions
         * or an append transaction that updates the on-disk file size. These
         * cases are the only cases where we should *potentially* be needing
         * to update the VFS inode size.
-         *
+         */
+        if (flags == 0) {
+                ASSERT(offset + size <= i_size_read(inode));
+                return 0;
+        }
+        /*
         * We need to update the in-core inode size here so that we don't end up
         * with the on-disk inode size being outside the in-core inode size. We
         * have no other method of updating EOF for AIO, so always do it here
@@ -1616,91 +1572,56 @@ __xfs_end_io_direct_write(
         * here can result in EOF moving backwards and Bad Things Happen when
         * that occurs.
         */
-        spin_lock(&XFS_I(inode)->i_flags_lock);
+        spin_lock(&ip->i_flags_lock);
        if (offset + size > i_size_read(inode))
                i_size_write(inode, offset + size);
-        spin_unlock(&XFS_I(inode)->i_flags_lock);
+        spin_unlock(&ip->i_flags_lock);
-        /*
+        if (flags & XFS_DIO_FLAG_UNWRITTEN) {
-         * If we are doing an append IO that needs to update the EOF on disk,
+                trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
-         * do the transaction reserve now so we can use common end io
-         * processing. Stashing the error (if there is one) in the ioend will
-         * result in the ioend processing passing on the error if it is
-         * possible as we can't return it from here.
-         */
-        if (ioend->io_type == XFS_IO_OVERWRITE)
-                ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
-out_end_io:
+                error = xfs_iomap_write_unwritten(ip, offset, size);
-        xfs_end_io(&ioend->io_work);
+        } else if (flags & XFS_DIO_FLAG_APPEND) {
-        return;
+                struct xfs_trans *tp;
-}
-/*
+                trace_xfs_end_io_direct_write_append(ip, offset, size);
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
-        struct kiocb            *iocb,
-        loff_t                  offset,
-        ssize_t                 size,
-        void                    *private)
-{
-        struct inode            *inode = file_inode(iocb->ki_filp);
-        struct xfs_ioend        *ioend = private;
-        trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
+                tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-                                     ioend ? ioend->io_type : 0, NULL);
+                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
+                if (error) {
-        if (!ioend) {
+                        xfs_trans_cancel(tp);
-                ASSERT(offset + size <= i_size_read(inode));
+                        return error;
-                return;
+                }
+                error = xfs_setfilesize(ip, tp, offset, size);
        }
-        __xfs_end_io_direct_write(inode, ioend, offset, size);
+        return error;
 }
-static inline ssize_t
+STATIC ssize_t
-xfs_vm_do_dio(
+xfs_vm_direct_IO(
-        struct inode            *inode,
        struct kiocb            *iocb,
        struct iov_iter         *iter,
-        loff_t                  offset,
+        loff_t                  offset)
-        void                    (*endio)(struct kiocb   *iocb,
-                                         loff_t         offset,
-                                         ssize_t        size,
-                                         void           *private),
-        int                     flags)
 {
+        struct inode            *inode = iocb->ki_filp->f_mapping->host;
+        dio_iodone_t            *endio = NULL;
+        int                     flags = 0;
        struct block_device     *bdev;
-        if (IS_DAX(inode))
+        if (iov_iter_rw(iter) == WRITE) {
+                endio = xfs_end_io_direct_write;
+                flags = DIO_ASYNC_EXTEND;
+        }
+        if (IS_DAX(inode)) {
                return dax_do_io(iocb, inode, iter, offset,
                                 xfs_get_blocks_direct, endio, 0);
+        }
        bdev = xfs_find_bdev_for_inode(inode);
        return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
-                                     xfs_get_blocks_direct, endio, NULL, flags);
+                        xfs_get_blocks_direct, endio, NULL, flags);
-}
-STATIC ssize_t
-xfs_vm_direct_IO(
-        struct kiocb            *iocb,
-        struct iov_iter         *iter,
-        loff_t                  offset)
-{
-        struct inode            *inode = iocb->ki_filp->f_mapping->host;
-        if (iov_iter_rw(iter) == WRITE)
-                return xfs_vm_do_dio(inode, iocb, iter, offset,
-                                     xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
-        return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
 }
 /*
@@ -1783,14 +1704,22 @@ xfs_vm_write_failed(
                if (block_start >= to)
                        break;
-                if (!buffer_delay(bh))
+                /*
+                 * Process delalloc and unwritten buffers beyond EOF. We can
+                 * encounter unwritten buffers in the event that a file has
+                 * post-EOF unwritten extents and an extending write happens to
+                 * fail (e.g., an unaligned write that also involves a delalloc
+                 * to the same page).
+                 */
+                if (!buffer_delay(bh) && !buffer_unwritten(bh))
                        continue;
                if (!buffer_new(bh) && block_offset < i_size_read(inode))
                        continue;
-                xfs_vm_kill_delalloc_range(inode, block_offset,
+                if (buffer_delay(bh))
-                                           block_offset + bh->b_size);
+                        xfs_vm_kill_delalloc_range(inode, block_offset,
+                                                   block_offset + bh->b_size);
                /*
                 * This buffer does not contain data anymore. make sure anyone
@@ -1801,6 +1730,7 @@ xfs_vm_write_failed(
                clear_buffer_mapped(bh);
                clear_buffer_new(bh);
                clear_buffer_dirty(bh);
+                clear_buffer_unwritten(bh);
        }
 }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 708775613e55..fd7f51c39b3f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -202,10 +202,12 @@ xfs_bmap_rtalloc(
                ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
        /*
-         * Lock out other modifications to the RT bitmap inode.
+         * Lock out modifications to both the RT bitmap and summary inodes
         */
        xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+        xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
        /*
         * If it's an allocation to an empty file at offset 0,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9c44d38dcd1f..316b2a1bdba5 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits(
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        struct xfs_disk_dquot   *d = &dq->q_core;
+        struct xfs_def_quota    *defq;
        int                     prealloc = 0;
        ASSERT(d->d_id);
+        defq = xfs_get_defquota(dq, q);
-        if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
+        if (defq->bsoftlimit && !d->d_blk_softlimit) {
-                d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+                d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
                prealloc = 1;
        }
-        if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
+        if (defq->bhardlimit && !d->d_blk_hardlimit) {
-                d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+                d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
                prealloc = 1;
        }
-        if (q->qi_isoftlimit && !d->d_ino_softlimit)
+        if (defq->isoftlimit && !d->d_ino_softlimit)
-                d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
+                d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
-        if (q->qi_ihardlimit && !d->d_ino_hardlimit)
+        if (defq->ihardlimit && !d->d_ino_hardlimit)
-                d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
+                d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
-        if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
+        if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
-                d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
+                d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
-        if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
+        if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
-                d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+                d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
        if (prealloc)
                xfs_dquot_set_prealloc_limits(dq);
@@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk(
 {
        struct xfs_quotainfo    *q = mp->m_quotainfo;
        xfs_dqblk_t     *d;
-        int             curid, i;
+        xfs_dqid_t      curid;
+        int             i;
        ASSERT(tp);
        ASSERT(xfs_buf_islocked(bp));
@@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk(
         * ID of the first dquot in the block - id's are zero based.
         */
        curid = id - (id % q->qi_dqperchunk);
-        ASSERT(curid >= 0);
        memset(d, 0, BBTOB(q->qi_dqchunklen));
        for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
                d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -464,12 +466,13 @@ xfs_qm_dqtobp(
        struct xfs_bmbt_irec    map;
        int                     nmaps = 1, error;
        struct xfs_buf          *bp;
-        struct xfs_inode        *quotip = xfs_dq_to_quota_inode(dqp);
+        struct xfs_inode        *quotip;
        struct xfs_mount        *mp = dqp->q_mount;
        xfs_dqid_t              id = be32_to_cpu(dqp->q_core.d_id);
        struct xfs_trans        *tp = (tpp ? *tpp : NULL);
        uint                    lock_mode;
+        quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
        dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
        lock_mode = xfs_ilock_data_map_shared(quotip);
@@ -685,6 +688,56 @@ error0:
 }
 /*
+ * Advance to the next id in the current chunk, or if at the
+ * end of the chunk, skip ahead to first id in next allocated chunk
+ * using the SEEK_DATA interface.
+ */
+int
+xfs_dq_get_next_id(
+        xfs_mount_t             *mp,
+        uint                    type,
+        xfs_dqid_t              *id,
+        loff_t                  eof)
+{
+        struct xfs_inode        *quotip;
+        xfs_fsblock_t           start;
+        loff_t                  offset;
+        uint                    lock;
+        xfs_dqid_t              next_id;
+        int                     error = 0;
+        /* Simple advance */
+        next_id = *id + 1;
+        /* If new ID is within the current chunk, advancing it sufficed */
+        if (next_id % mp->m_quotainfo->qi_dqperchunk) {
+                *id = next_id;
+                return 0;
+        }
+        /* Nope, next_id is now past the current chunk, so find the next one */
+        start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
+        quotip = xfs_quota_inode(mp, type);
+        lock = xfs_ilock_data_map_shared(quotip);
+        offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
+                                      eof, SEEK_DATA);
+        if (offset < 0)
+                error = offset;
+        xfs_iunlock(quotip, lock);
+        /* -ENXIO is essentially "no more data" */
+        if (error)
+                return (error == -ENXIO ? -ENOENT: error);
+        /* Convert next data offset back to a quota id */
+        *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
+        return 0;
+}
+/*
 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
 * a locked dquot, doing an allocation (if requested) as needed.
 * When both an inode and an id are given, the inode's id takes precedence.
@@ -704,6 +757,7 @@ xfs_qm_dqget(
        struct xfs_quotainfo    *qi = mp->m_quotainfo;
        struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
        struct xfs_dquot        *dqp;
+        loff_t                  eof = 0;
        int                     error;
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -731,6 +785,21 @@ xfs_qm_dqget(
        }
 #endif
+        /* Get the end of the quota file if we need it */
+        if (flags & XFS_QMOPT_DQNEXT) {
+                struct xfs_inode        *quotip;
+                xfs_fileoff_t           last;
+                uint                    lock_mode;
+                quotip = xfs_quota_inode(mp, type);
+                lock_mode = xfs_ilock_data_map_shared(quotip);
+                error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
+                xfs_iunlock(quotip, lock_mode);
+                if (error)
+                        return error;
+                eof = XFS_FSB_TO_B(mp, last);
+        }
 restart:
        mutex_lock(&qi->qi_tree_lock);
        dqp = radix_tree_lookup(tree, id);
@@ -744,6 +813,18 @@ restart:
                        goto restart;
                }
+                /* uninit / unused quota found in radix tree, keep looking  */
+                if (flags & XFS_QMOPT_DQNEXT) {
+                        if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+                                xfs_dqunlock(dqp);
+                                mutex_unlock(&qi->qi_tree_lock);
+                                error = xfs_dq_get_next_id(mp, type, &id, eof);
+                                if (error)
+                                        return error;
+                                goto restart;
+                        }
+                }
                dqp->q_nrefs++;
                mutex_unlock(&qi->qi_tree_lock);
@@ -770,6 +851,13 @@ restart:
        if (ip)
                xfs_ilock(ip, XFS_ILOCK_EXCL);
+        /* If we are asked to find next active id, keep looking */
+        if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
+                error = xfs_dq_get_next_id(mp, type, &id, eof);
+                if (!error)
+                        goto restart;
+        }
        if (error)
                return error;
@@ -820,6 +908,17 @@ restart:
        qi->qi_dquots++;
        mutex_unlock(&qi->qi_tree_lock);
+        /* If we are asked to find next active id, keep looking */
+        if (flags & XFS_QMOPT_DQNEXT) {
+                if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+                        xfs_qm_dqput(dqp);
+                        error = xfs_dq_get_next_id(mp, type, &id, eof);
+                        if (error)
+                                return error;
+                        goto restart;
+                }
+        }
 dqret:
        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
        trace_xfs_dqget_miss(dqp);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7333fbba5c2..ac0fd32de31e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1337,31 +1337,31 @@ out:
        return found;
 }
-STATIC loff_t
+/*
-xfs_seek_hole_data(
+ * caller must lock inode with xfs_ilock_data_map_shared,
-        struct file             *file,
+ * can we craft an appropriate ASSERT?
+ *
+ * end is because the VFS-level lseek interface is defined such that any
+ * offset past i_size shall return -ENXIO, but we use this for quota code
+ * which does not maintain i_size, and we want to SEEK_DATA past i_size.
+ */
+loff_t
+__xfs_seek_hole_data(
+        struct inode            *inode,
        loff_t                  start,
+        loff_t                  end,
        int                     whence)
 {
-        struct inode            *inode = file->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
        loff_t                  uninitialized_var(offset);
-        xfs_fsize_t             isize;
        xfs_fileoff_t           fsbno;
-        xfs_filblks_t           end;
+        xfs_filblks_t           lastbno;
-        uint                    lock;
        int                     error;
-        if (XFS_FORCED_SHUTDOWN(mp))
+        if (start >= end) {
-                return -EIO;
-        lock = xfs_ilock_data_map_shared(ip);
-        isize = i_size_read(inode);
-        if (start >= isize) {
                error = -ENXIO;
-                goto out_unlock;
+                goto out_error;
        }
        /*
@@ -1369,22 +1369,22 @@ xfs_seek_hole_data(
         * by fsbno to the end block of the file.
         */
        fsbno = XFS_B_TO_FSBT(mp, start);
-        end = XFS_B_TO_FSB(mp, isize);
+        lastbno = XFS_B_TO_FSB(mp, end);
        for (;;) {
                struct xfs_bmbt_irec    map[2];
                int                     nmap = 2;
                unsigned int            i;
-                error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+                error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
                                       XFS_BMAPI_ENTIRE);
                if (error)
-                        goto out_unlock;
+                        goto out_error;
                /* No extents at given offset, must be beyond EOF */
                if (nmap == 0) {
                        error = -ENXIO;
-                        goto out_unlock;
+                        goto out_error;
                }
                for (i = 0; i < nmap; i++) {
@@ -1426,7 +1426,7 @@ xfs_seek_hole_data(
                         * hole at the end of any file).
                         */
                        if (whence == SEEK_HOLE) {
-                                offset = isize;
+                                offset = end;
                                break;
                        }
                        /*
@@ -1434,7 +1434,7 @@ xfs_seek_hole_data(
                         */
                        ASSERT(whence == SEEK_DATA);
                        error = -ENXIO;
-                        goto out_unlock;
+                        goto out_error;
                }
                ASSERT(i > 1);
@@ -1445,14 +1445,14 @@ xfs_seek_hole_data(
                 */
                fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
                start = XFS_FSB_TO_B(mp, fsbno);
-                if (start >= isize) {
+                if (start >= end) {
                        if (whence == SEEK_HOLE) {
-                                offset = isize;
+                                offset = end;
                                break;
                        }
                        ASSERT(whence == SEEK_DATA);
                        error = -ENXIO;
-                        goto out_unlock;
+                        goto out_error;
                }
        }
@@ -1464,7 +1464,39 @@ out:
         * situation in particular.
         */
        if (whence == SEEK_HOLE)
-                offset = min_t(loff_t, offset, isize);
+                offset = min_t(loff_t, offset, end);
+        return offset;
+out_error:
+        return error;
+}
+STATIC loff_t
+xfs_seek_hole_data(
+        struct file             *file,
+        loff_t                  start,
+        int                     whence)
+{
+        struct inode            *inode = file->f_mapping->host;
+        struct xfs_inode        *ip = XFS_I(inode);
+        struct xfs_mount        *mp = ip->i_mount;
+        uint                    lock;
+        loff_t                  offset, end;
+        int                     error = 0;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -EIO;
+        lock = xfs_ilock_data_map_shared(ip);
+        end = i_size_read(inode);
+        offset = __xfs_seek_hole_data(inode, start, end, whence);
+        if (offset < 0) {
+                error = offset;
+                goto out_unlock;
+        }
        offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
 out_unlock:
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 1b6a98b66886..f32713f14f9a 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
 extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                xfs_fsop_resblks_t *outval);
 extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(struct xfs_mount *mp);
 #endif  /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d6277494e606..43e1d51b15eb 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -435,6 +435,8 @@ int	xfs_update_prealloc_flags(struct xfs_inode *ip,
 int     xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
                     xfs_fsize_t isize, bool *did_zeroing);
 int     xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+loff_t  __xfs_seek_hole_data(struct inode *inode, loff_t start,
+                             loff_t eof, int whence);
 /* from xfs_iops.c */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index bd6f23b952a5..e776594889c3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
        bool                    tmp_wrapped;
        /*
-         * Search backwards through the log looking for the log record header
+         * Check the head of the log for torn writes. Search backwards from the
-         * block. This wraps all the way back around to the head so something is
+         * head until we hit the tail or the maximum number of log record I/Os
-         * seriously wrong if we can't find it.
+         * that could have been in flight at one time. Use a temporary buffer so
-         */
+         * we don't trash the rhead/bp pointers from the caller.
-        found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
-                                      rhead, wrapped);
-        if (found < 0)
-                return found;
-        if (!found) {
-                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
-                return -EIO;
-        }
-        *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
-        /*
-         * Now that we have a tail block, check the head of the log for torn
-         * writes. Search again until we hit the tail or the maximum number of
-         * log record I/Os that could have been in flight at one time. Use a
-         * temporary buffer so we don't trash the rhead/bp pointer from the
-         * call above.
         */
        tmp_bp = xlog_get_bp(log, 1);
        if (!tmp_bp)
@@ -1216,6 +1199,115 @@ xlog_verify_head(
 }
 /*
+ * Check whether the head of the log points to an unmount record. In other
+ * words, determine whether the log is clean. If so, update the in-core state
+ * appropriately.
+ */
+static int
+xlog_check_unmount_rec(
+        struct xlog             *log,
+        xfs_daddr_t             *head_blk,
+        xfs_daddr_t             *tail_blk,
+        struct xlog_rec_header  *rhead,
+        xfs_daddr_t             rhead_blk,
+        struct xfs_buf          *bp,
+        bool                    *clean)
+{
+        struct xlog_op_header   *op_head;
+        xfs_daddr_t             umount_data_blk;
+        xfs_daddr_t             after_umount_blk;
+        int                     hblks;
+        int                     error;
+        char                    *offset;
+        *clean = false;
+        /*
+         * Look for unmount record. If we find it, then we know there was a
+         * clean unmount. Since 'i' could be the last block in the physical
+         * log, we convert to a log block before comparing to the head_blk.
+         *
+         * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
+         * below. We won't want to clear the unmount record if there is one, so
+         * we pass the lsn of the unmount record rather than the block after it.
+         */
+        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+                int     h_size = be32_to_cpu(rhead->h_size);
+                int     h_version = be32_to_cpu(rhead->h_version);
+                if ((h_version & XLOG_VERSION_2) &&
+                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
+                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
+                                hblks++;
+                } else {
+                        hblks = 1;
+                }
+        } else {
+                hblks = 1;
+        }
+        after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+        after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
+        if (*head_blk == after_umount_blk &&
+            be32_to_cpu(rhead->h_num_logops) == 1) {
+                umount_data_blk = rhead_blk + hblks;
+                umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
+                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+                if (error)
+                        return error;
+                op_head = (struct xlog_op_header *)offset;
+                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                        /*
+                         * Set tail and last sync so that newly written log
+                         * records will point recovery to after the current
+                         * unmount record.
+                         */
+                        xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                        log->l_curr_cycle, after_umount_blk);
+                        xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                        log->l_curr_cycle, after_umount_blk);
+                        *tail_blk = after_umount_blk;
+                        *clean = true;
+                }
+        }
+        return 0;
+}
+static void
+xlog_set_state(
+        struct xlog             *log,
+        xfs_daddr_t             head_blk,
+        struct xlog_rec_header  *rhead,
+        xfs_daddr_t             rhead_blk,
+        bool                    bump_cycle)
+{
+        /*
+         * Reset log values according to the state of the log when we
+         * crashed.  In the case where head_blk == 0, we bump curr_cycle
+         * one because the next write starts a new cycle rather than
+         * continuing the cycle of the last good log record.  At this
+         * point we have guaranteed that all partial log records have been
+         * accounted for.  Therefore, we know that the last good log record
+         * written was complete and ended exactly on the end boundary
+         * of the physical log.
+         */
+        log->l_prev_block = rhead_blk;
+        log->l_curr_block = (int)head_blk;
+        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
+        if (bump_cycle)
+                log->l_curr_cycle++;
+        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
+        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
+        xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
+                                        BBTOB(log->l_curr_block));
+        xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
+                                        BBTOB(log->l_curr_block));
+}
+/*
 * Find the sync block number or the tail of the log.
 *
 * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@ xlog_find_tail(
        xfs_daddr_t             *tail_blk)
 {
        xlog_rec_header_t       *rhead;
-        xlog_op_header_t        *op_head;
        char                    *offset = NULL;
        xfs_buf_t               *bp;
        int                     error;
-        xfs_daddr_t             umount_data_blk;
-        xfs_daddr_t             after_umount_blk;
        xfs_daddr_t             rhead_blk;
        xfs_lsn_t               tail_lsn;
-        int                     hblks;
        bool                    wrapped = false;
+        bool                    clean = false;
        /*
         * Find previous log record
         */
        if ((error = xlog_find_head(log, head_blk)))
                return error;
+        ASSERT(*head_blk < INT_MAX);
        bp = xlog_get_bp(log, 1);
        if (!bp)
@@ -1271,100 +1361,75 @@ xlog_find_tail(
        }
        /*
-         * Trim the head block back to skip over torn records. We can have
+         * Search backwards through the log looking for the log record header
-         * multiple log I/Os in flight at any time, so we assume CRC failures
+         * block. This wraps all the way back around to the head so something is
-         * back through the previous several records are torn writes and skip
+         * seriously wrong if we can't find it.
-         * them.
         */
-        ASSERT(*head_blk < INT_MAX);
+        error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
-        error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
+                                      &rhead_blk, &rhead, &wrapped);
-                                 &rhead, &wrapped);
+        if (error < 0)
-        if (error)
+                return error;
-                goto done;
+        if (!error) {
+                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+                return -EIO;
+        }
+        *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
        /*
-         * Reset log values according to the state of the log when we
+         * Set the log state based on the current head record.
-         * crashed.  In the case where head_blk == 0, we bump curr_cycle
-         * one because the next write starts a new cycle rather than
-         * continuing the cycle of the last good log record.  At this
-         * point we have guaranteed that all partial log records have been
-         * accounted for.  Therefore, we know that the last good log record
-         * written was complete and ended exactly on the end boundary
-         * of the physical log.
         */
-        log->l_prev_block = rhead_blk;
+        xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
-        log->l_curr_block = (int)*head_blk;
+        tail_lsn = atomic64_read(&log->l_tail_lsn);
-        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-        if (wrapped)
-                log->l_curr_cycle++;
-        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
-        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
-        xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
-                                        BBTOB(log->l_curr_block));
-        xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
-                                        BBTOB(log->l_curr_block));
        /*
-         * Look for unmount record.  If we find it, then we know there
+         * Look for an unmount record at the head of the log. This sets the log
-         * was a clean unmount.  Since 'i' could be the last block in
+         * state to determine whether recovery is necessary.
-         * the physical log, we convert to a log block before comparing
+         */
-         * to the head_blk.
+        error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
+                                       rhead_blk, bp, &clean);
+        if (error)
+                goto done;
+        /*
+         * Verify the log head if the log is not clean (e.g., we have anything
+         * but an unmount record at the head). This uses CRC verification to
+         * detect and trim torn writes. If discovered, CRC failures are
+         * considered torn writes and the log head is trimmed accordingly.
         *
-         * Save the current tail lsn to use to pass to
+         * Note that we can only run CRC verification when the log is dirty
-         * xlog_clear_stale_blocks() below.  We won't want to clear the
+         * because there's no guarantee that the log data behind an unmount
-         * unmount record if there is one, so we pass the lsn of the
+         * record is compatible with the current architecture.
-         * unmount record rather than the block after it.
         */
-        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+        if (!clean) {
-                int     h_size = be32_to_cpu(rhead->h_size);
+                xfs_daddr_t     orig_head = *head_blk;
-                int     h_version = be32_to_cpu(rhead->h_version);
-                if ((h_version & XLOG_VERSION_2) &&
+                error = xlog_verify_head(log, head_blk, tail_blk, bp,
-                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
+                                         &rhead_blk, &rhead, &wrapped);
-                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
-                        if (h_size % XLOG_HEADER_CYCLE_SIZE)
-                                hblks++;
-                } else {
-                        hblks = 1;
-                }
-        } else {
-                hblks = 1;
-        }
-        after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
-        after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
-        tail_lsn = atomic64_read(&log->l_tail_lsn);
-        if (*head_blk == after_umount_blk &&
-            be32_to_cpu(rhead->h_num_logops) == 1) {
-                umount_data_blk = rhead_blk + hblks;
-                umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
-                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
                if (error)
                        goto done;
-                op_head = (xlog_op_header_t *)offset;
+                /* update in-core state again if the head changed */
-                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
+                if (*head_blk != orig_head) {
-                        /*
+                        xlog_set_state(log, *head_blk, rhead, rhead_blk,
-                         * Set tail and last sync so that newly written
+                                       wrapped);
-                         * log records will point recovery to after the
+                        tail_lsn = atomic64_read(&log->l_tail_lsn);
-                         * current unmount record.
+                        error = xlog_check_unmount_rec(log, head_blk, tail_blk,
-                         */
+                                                       rhead, rhead_blk, bp,
-                        xlog_assign_atomic_lsn(&log->l_tail_lsn,
+                                                       &clean);
-                                        log->l_curr_cycle, after_umount_blk);
+                        if (error)
-                        xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
+                                goto done;
-                                        log->l_curr_cycle, after_umount_blk);
-                        *tail_blk = after_umount_blk;
-                        /*
-                         * Note that the unmount was clean. If the unmount
-                         * was not clean, we need to know this to rebuild the
-                         * superblock counters from the perag headers if we
-                         * have a filesystem using non-persistent counters.
-                         */
-                        log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
                }
        }
        /*
+         * Note that the unmount was clean. If the unmount was not clean, we
+         * need to know this to rebuild the superblock counters from the perag
+         * headers if we have a filesystem using non-persistent counters.
+         */
+        if (clean)
+                log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
+        /*
         * Make sure that there are no blocks in front of the head
         * with the same cycle number as the head.  This can happen
         * because we allow multiple outstanding log writes concurrently,
@@ -2473,6 +2538,13 @@ xlog_recover_validate_buf_type(
                }
                bp->b_ops = &xfs_sb_buf_ops;
                break;
+#ifdef CONFIG_XFS_RT
+        case XFS_BLFT_RTBITMAP_BUF:
+        case XFS_BLFT_RTSUMMARY_BUF:
+                /* no magic numbers for verification of RT buffers */
+                bp->b_ops = &xfs_rtbuf_ops;
+                break;
+#endif /* CONFIG_XFS_RT */
        default:
                xfs_warn(mp, "Unknown buffer type %d!",
                         xfs_blft_from_flags(buf_f));
@@ -4491,7 +4563,7 @@ xlog_recover_process(
         * know precisely what failed.
         */
        if (pass == XLOG_RECOVER_CRCPASS) {
-                if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+                if (rhead->h_crc && crc != rhead->h_crc)
                        return -EFSBADCRC;
                return 0;
        }
@@ -4502,7 +4574,7 @@ xlog_recover_process(
         * zero CRC check prevents warnings from being emitted when upgrading
         * the kernel from one that does not add CRCs by default.
         */
-        if (crc != le32_to_cpu(rhead->h_crc)) {
+        if (crc != rhead->h_crc) {
                if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
                        xfs_alert(log->l_mp,
                "log record CRC mismatch: found 0x%x, expected 0x%x.",
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b57098481c10..a4e03ab50342 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -327,7 +327,6 @@ extern int	xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
                                 bool reserved);
 extern int      xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
-extern int      xfs_mount_log_sb(xfs_mount_t *);
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int      xfs_readsb(xfs_mount_t *, int);
 extern void     xfs_freesb(xfs_mount_t *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 532ab79d38fe..be125e1758c1 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -560,6 +560,37 @@ xfs_qm_shrink_count(
        return list_lru_shrink_count(&qi->qi_lru, sc);
 }
+STATIC void
+xfs_qm_set_defquota(
+        xfs_mount_t     *mp,
+        uint            type,
+        xfs_quotainfo_t *qinf)
+{
+        xfs_dquot_t             *dqp;
+        struct xfs_def_quota    *defq;
+        int                     error;
+        error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
+        if (!error) {
+                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
+                defq = xfs_get_defquota(dqp, qinf);
+                /*
+                 * Timers and warnings have been already set, let's just set the
+                 * default limits for this quota type
+                 */
+                defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+                defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+                defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+                defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+                defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+                defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+                xfs_qm_dqdestroy(dqp);
+        }
+}
 /*
 * This initializes all the quota information that's kept in the
 * mount structure
@@ -606,19 +637,19 @@ xfs_qm_init_quotainfo(
         * We try to get the limits from the superuser's limits fields.
         * This is quite hacky, but it is standard quota practice.
         *
-         * We look at the USR dquot with id == 0 first, but if user quotas
-         * are not enabled we goto the GRP dquot with id == 0.
-         * We don't really care to keep separate default limits for user
-         * and group quotas, at least not at this point.
-         *
         * Since we may not have done a quotacheck by this point, just read
         * the dquot without attaching it to any hashtables or lists.
+         *
+         * Timers and warnings are globally set by the first timer found in
+         * user/group/proj quota types, otherwise a default value is used.
+         * This should be split into different fields per quota type.
         */
        error = xfs_qm_dqread(mp, 0,
                        XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
                         (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
                          XFS_DQ_PROJ),
                        XFS_QMOPT_DOWARN, &dqp);
        if (!error) {
                xfs_disk_dquot_t        *ddqp = &dqp->q_core;
@@ -639,13 +670,6 @@ xfs_qm_init_quotainfo(
                        be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
                qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
                        be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
-                qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
-                qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
-                qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
-                qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
-                qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
-                qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
                xfs_qm_dqdestroy(dqp);
        } else {
                qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -656,6 +680,13 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
        }
+        if (XFS_IS_UQUOTA_RUNNING(mp))
+                xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
+        if (XFS_IS_GQUOTA_RUNNING(mp))
+                xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf);
+        if (XFS_IS_PQUOTA_RUNNING(mp))
+                xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf);
        qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
        qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
        qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 996a04064894..2975a822e9f0 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -53,6 +53,15 @@ extern struct kmem_zone	*xfs_qm_dqtrxzone;
 */
 #define XFS_DQUOT_CLUSTER_SIZE_FSB      (xfs_filblks_t)1
+struct xfs_def_quota {
+        xfs_qcnt_t       bhardlimit;     /* default data blk hard limit */
+        xfs_qcnt_t       bsoftlimit;     /* default data blk soft limit */
+        xfs_qcnt_t       ihardlimit;     /* default inode count hard limit */
+        xfs_qcnt_t       isoftlimit;     /* default inode count soft limit */
+        xfs_qcnt_t       rtbhardlimit;   /* default realtime blk hard limit */
+        xfs_qcnt_t       rtbsoftlimit;   /* default realtime blk soft limit */
+};
 /*
 * Various quota information for individual filesystems.
 * The mount structure keeps a pointer to this.
@@ -76,12 +85,9 @@ typedef struct xfs_quotainfo {
        struct mutex     qi_quotaofflock;/* to serialize quotaoff */
        xfs_filblks_t    qi_dqchunklen;  /* # BBs in a chunk of dqs */
        uint             qi_dqperchunk;  /* # ondisk dqs in above chunk */
-        xfs_qcnt_t       qi_bhardlimit;  /* default data blk hard limit */
+        struct xfs_def_quota    qi_usr_default;
-        xfs_qcnt_t       qi_bsoftlimit;  /* default data blk soft limit */
+        struct xfs_def_quota    qi_grp_default;
-        xfs_qcnt_t       qi_ihardlimit;  /* default inode count hard limit */
+        struct xfs_def_quota    qi_prj_default;
-        xfs_qcnt_t       qi_isoftlimit;  /* default inode count soft limit */
-        xfs_qcnt_t       qi_rtbhardlimit;/* default realtime blk hard limit */
-        xfs_qcnt_t       qi_rtbsoftlimit;/* default realtime blk soft limit */
        struct shrinker  qi_shrinker;
 } xfs_quotainfo_t;
@@ -104,15 +110,15 @@ xfs_dquot_tree(
 }
 static inline struct xfs_inode *
-xfs_dq_to_quota_inode(struct xfs_dquot *dqp)
+xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
 {
-        switch (dqp->dq_flags & XFS_DQ_ALLTYPES) {
+        switch (dq_flags & XFS_DQ_ALLTYPES) {
        case XFS_DQ_USER:
-                return dqp->q_mount->m_quotainfo->qi_uquotaip;
+                return mp->m_quotainfo->qi_uquotaip;
        case XFS_DQ_GROUP:
-                return dqp->q_mount->m_quotainfo->qi_gquotaip;
+                return mp->m_quotainfo->qi_gquotaip;
        case XFS_DQ_PROJ:
-                return dqp->q_mount->m_quotainfo->qi_pquotaip;
+                return mp->m_quotainfo->qi_pquotaip;
        default:
                ASSERT(0);
        }
@@ -164,11 +170,27 @@ extern void		xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
 /* quota ops */
 extern int              xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
-extern int              xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
+extern int              xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
-                                        uint, struct qc_dqblk *);
+                                        uint, struct qc_dqblk *, uint);
 extern int              xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
                                        struct qc_dqblk *);
 extern int              xfs_qm_scall_quotaon(struct xfs_mount *, uint);
 extern int              xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
+static inline struct xfs_def_quota *
+xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
+{
+        struct xfs_def_quota *defq;
+        if (XFS_QM_ISUDQ(dqp))
+                defq = &qi->qi_usr_default;
+        else if (XFS_QM_ISGDQ(dqp))
+                defq = &qi->qi_grp_default;
+        else {
+                ASSERT(XFS_QM_ISPDQ(dqp));
+                defq = &qi->qi_prj_default;
+        }
+        return defq;
+}
 #endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3640c6e896af..f4d0e0a8f517 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -404,6 +404,7 @@ xfs_qm_scall_setqlim(
        struct xfs_disk_dquot   *ddq;
        struct xfs_dquot        *dqp;
        struct xfs_trans        *tp;
+        struct xfs_def_quota    *defq;
        int                     error;
        xfs_qcnt_t              hard, soft;
@@ -431,6 +432,8 @@ xfs_qm_scall_setqlim(
                ASSERT(error != -ENOENT);
                goto out_unlock;
        }
+        defq = xfs_get_defquota(dqp, q);
        xfs_dqunlock(dqp);
        tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -458,8 +461,8 @@ xfs_qm_scall_setqlim(
                ddq->d_blk_softlimit = cpu_to_be64(soft);
                xfs_dquot_set_prealloc_limits(dqp);
                if (id == 0) {
-                        q->qi_bhardlimit = hard;
+                        defq->bhardlimit = hard;
-                        q->qi_bsoftlimit = soft;
+                        defq->bsoftlimit = soft;
                }
        } else {
                xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
@@ -474,8 +477,8 @@ xfs_qm_scall_setqlim(
                ddq->d_rtb_hardlimit = cpu_to_be64(hard);
                ddq->d_rtb_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                        q->qi_rtbhardlimit = hard;
+                        defq->rtbhardlimit = hard;
-                        q->qi_rtbsoftlimit = soft;
+                        defq->rtbsoftlimit = soft;
                }
        } else {
                xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
@@ -491,8 +494,8 @@ xfs_qm_scall_setqlim(
                ddq->d_ino_hardlimit = cpu_to_be64(hard);
                ddq->d_ino_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                        q->qi_ihardlimit = hard;
+                        defq->ihardlimit = hard;
-                        q->qi_isoftlimit = soft;
+                        defq->isoftlimit = soft;
                }
        } else {
                xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
@@ -635,9 +638,10 @@ out:
 int
 xfs_qm_scall_getquota(
        struct xfs_mount        *mp,
-        xfs_dqid_t              id,
+        xfs_dqid_t              *id,
        uint                    type,
-        struct qc_dqblk         *dst)
+        struct qc_dqblk         *dst,
+        uint                    dqget_flags)
 {
        struct xfs_dquot        *dqp;
        int                     error;
@@ -647,7 +651,7 @@ xfs_qm_scall_getquota(
         * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
         * exist, we'll get ENOENT back.
         */
-        error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
+        error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
        if (error)
                return error;
@@ -660,6 +664,9 @@ xfs_qm_scall_getquota(
                goto out_put;
        }
+        /* Fill in the ID we actually read from disk */
+        *id = be32_to_cpu(dqp->q_core.d_id);
        memset(dst, 0, sizeof(*dst));
        dst->d_spc_hardlimit =
                XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
@@ -701,7 +708,7 @@ xfs_qm_scall_getquota(
        if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
             (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
             (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
-            id != 0) {
+            *id != 0) {
                if ((dst->d_space > dst->d_spc_softlimit) &&
                    (dst->d_spc_softlimit > 0)) {
                        ASSERT(dst->d_spc_timer != 0);
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 7795e0d01382..f82d79a8c694 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -231,14 +231,45 @@ xfs_fs_get_dqblk(
        struct qc_dqblk         *qdq)
 {
        struct xfs_mount        *mp = XFS_M(sb);
+        xfs_dqid_t              id;
        if (!XFS_IS_QUOTA_RUNNING(mp))
                return -ENOSYS;
        if (!XFS_IS_QUOTA_ON(mp))
                return -ESRCH;
-        return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
+        id = from_kqid(&init_user_ns, qid);
-                                      xfs_quota_type(qid.type), qdq);
+        return xfs_qm_scall_getquota(mp, &id,
+                                      xfs_quota_type(qid.type), qdq, 0);
+}
+/* Return quota info for active quota >= this qid */
+STATIC int
+xfs_fs_get_nextdqblk(
+        struct super_block      *sb,
+        struct kqid             *qid,
+        struct qc_dqblk         *qdq)
+{
+        int                     ret;
+        struct xfs_mount        *mp = XFS_M(sb);
+        xfs_dqid_t              id;
+        if (!XFS_IS_QUOTA_RUNNING(mp))
+                return -ENOSYS;
+        if (!XFS_IS_QUOTA_ON(mp))
+                return -ESRCH;
+        id = from_kqid(&init_user_ns, *qid);
+        ret = xfs_qm_scall_getquota(mp, &id,
+                                    xfs_quota_type(qid->type), qdq,
+                                    XFS_QMOPT_DQNEXT);
+        if (ret)
+                return ret;
+        /* ID may be different, so convert back what we got */
+        *qid = make_kqid(current_user_ns(), qid->type, id);
+        return 0;
+        
 }
 STATIC int
@@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = {
        .quota_disable          = xfs_quota_disable,
        .rm_xquota              = xfs_fs_rm_xquota,
        .get_dqblk              = xfs_fs_get_dqblk,
+        .get_nextdqblk          = xfs_fs_get_nextdqblk,
        .set_dqblk              = xfs_fs_set_dqblk,
 };
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 391d797cb53f..c8d58426008e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
 DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
 DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
 DECLARE_EVENT_CLASS(xfs_itrunc_class,
        TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 4f18fd92ca13..d6c9c3e9e02b 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,6 +497,7 @@ xfsaild(
        long            tout = 0;       /* milliseconds */
        current->flags |= PF_MEMALLOC;
+        set_freezable();
        while (!kthread_should_stop()) {
                if (tout && tout <= 20)
@@ -519,14 +520,14 @@ xfsaild(
                if (!xfs_ail_min(ailp) &&
                    ailp->xa_target == ailp->xa_target_prev) {
                        spin_unlock(&ailp->xa_lock);
-                        schedule();
+                        freezable_schedule();
                        tout = 0;
                        continue;
                }
                spin_unlock(&ailp->xa_lock);
                if (tout)
-                        schedule_timeout(msecs_to_jiffies(tout));
+                        freezable_schedule_timeout(msecs_to_jiffies(tout));
                __set_current_state(TASK_RUNNING);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 995170194df0..c3d547211d16 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -609,17 +609,20 @@ xfs_trans_dqresv(
        xfs_qcnt_t      total_count;
        xfs_qcnt_t      *resbcountp;
        xfs_quotainfo_t *q = mp->m_quotainfo;
+        struct xfs_def_quota    *defq;
        xfs_dqlock(dqp);
+        defq = xfs_get_defquota(dqp, q);
        if (flags & XFS_TRANS_DQ_RES_BLKS) {
                hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
                if (!hardlimit)
-                        hardlimit = q->qi_bhardlimit;
+                        hardlimit = defq->bhardlimit;
                softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
                if (!softlimit)
-                        softlimit = q->qi_bsoftlimit;
+                        softlimit = defq->bsoftlimit;
                timer = be32_to_cpu(dqp->q_core.d_btimer);
                warns = be16_to_cpu(dqp->q_core.d_bwarns);
                warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
@@ -628,10 +631,10 @@ xfs_trans_dqresv(
                ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
                hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
                if (!hardlimit)
-                        hardlimit = q->qi_rtbhardlimit;
+                        hardlimit = defq->rtbhardlimit;
                softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
                if (!softlimit)
-                        softlimit = q->qi_rtbsoftlimit;
+                        softlimit = defq->rtbsoftlimit;
                timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
                warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
                warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
@@ -672,10 +675,10 @@ xfs_trans_dqresv(
                        warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
                        hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
                        if (!hardlimit)
-                                hardlimit = q->qi_ihardlimit;
+                                hardlimit = defq->ihardlimit;
                        softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
                        if (!softlimit)
-                                softlimit = q->qi_isoftlimit;
+                                softlimit = defq->isoftlimit;
                        if (hardlimit && total_count > hardlimit) {
                                xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a2046275cdf..d7f37bfcbdce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,7 +70,7 @@ extern int sysctl_protected_hardlinks;
 struct buffer_head;
 typedef int (get_block_t)(struct inode *inode, sector_t iblock,
                        struct buffer_head *bh_result, int create);
-typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
+typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
                        ssize_t bytes, void *private);
 typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b2505acfd3c0..fba92f5c1a63 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -425,6 +425,8 @@ struct quotactl_ops {
        int (*quota_sync)(struct super_block *, int);
        int (*set_info)(struct super_block *, int, struct qc_info *);
        int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
+        int (*get_nextdqblk)(struct super_block *, struct kqid *,
+                             struct qc_dqblk *);
        int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
        int (*get_state)(struct super_block *, struct qc_state *);
        int (*rm_xquota)(struct super_block *, unsigned int);
diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h
index dcd75cc26196..11b3b31faf14 100644
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -39,6 +39,7 @@
 #define Q_XQUOTARM      XQM_CMD(6)      /* free disk space used by dquots */
 #define Q_XQUOTASYNC    XQM_CMD(7)      /* delalloc flush, updates dquots */
 #define Q_XGETQSTATV    XQM_CMD(8)      /* newer version of get quota */
+#define Q_XGETNEXTQUOTA XQM_CMD(9)      /* get disk limits and usage >= ID */
 /*
 * fs_disk_quota structure:
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index 9c95b2c1c88a..38baddb807f5 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -71,6 +71,7 @@
 #define Q_SETINFO  0x800006     /* set information about quota files */
 #define Q_GETQUOTA 0x800007     /* get user quota structure */
 #define Q_SETQUOTA 0x800008     /* set user quota structure */
+#define Q_GETNEXTQUOTA 0x800009 /* get disk limits and usage >= ID */
 /* Quota format type IDs */
 #define QFMT_VFS_OLD 1
@@ -119,6 +120,19 @@ struct if_dqblk {
        __u32 dqb_valid;
 };
+struct if_nextdqblk {
+        __u64 dqb_bhardlimit;
+        __u64 dqb_bsoftlimit;
+        __u64 dqb_curspace;
+        __u64 dqb_ihardlimit;
+        __u64 dqb_isoftlimit;
+        __u64 dqb_curinodes;
+        __u64 dqb_btime;
+        __u64 dqb_itime;
+        __u32 dqb_valid;
+        __u32 dqb_id;
+};
 /*
 * Structure used for setting quota information about file via quotactl
 * Following flags are used to specify which fields are valid