aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/dax.c9
-rw-r--r--fs/direct-io.c9
-rw-r--r--fs/ext4/ext4.h30
-rw-r--r--fs/ext4/inode.c24
-rw-r--r--fs/ext4/page-io.c10
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/quota/quota.c66
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h16
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c32
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h16
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c1
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h3
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c30
-rw-r--r--fs/xfs/libxfs/xfs_sb.h1
-rw-r--r--fs/xfs/libxfs/xfs_shared.h1
-rw-r--r--fs/xfs/xfs_aops.c274
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_dquot.c129
-rw-r--r--fs/xfs/xfs_file.c82
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_log_recover.c282
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_qm.c55
-rw-r--r--fs/xfs/xfs_qm.h48
-rw-r--r--fs/xfs/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_quotaops.c36
-rw-r--r--fs/xfs/xfs_trace.h9
-rw-r--r--fs/xfs/xfs_trans_ail.c5
-rw-r--r--fs/xfs/xfs_trans_dquot.c15
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/quota.h2
-rw-r--r--include/uapi/linux/dqblk_xfs.h1
-rw-r--r--include/uapi/linux/quota.h14
38 files changed, 811 insertions, 450 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 4fd6b0c5c6b5..e38b2c589b54 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -267,8 +267,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
267 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) 267 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
268 inode_unlock(inode); 268 inode_unlock(inode);
269 269
270 if ((retval > 0) && end_io) 270 if (end_io) {
271 end_io(iocb, pos, retval, bh.b_private); 271 int err;
272
273 err = end_io(iocb, pos, retval, bh.b_private);
274 if (err)
275 retval = err;
276 }
272 277
273 if (!(flags & DIO_SKIP_DIO_COUNT)) 278 if (!(flags & DIO_SKIP_DIO_COUNT))
274 inode_dio_end(inode); 279 inode_dio_end(inode);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ffc8b84..9c6f885cc518 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
253 if (ret == 0) 253 if (ret == 0)
254 ret = transferred; 254 ret = transferred;
255 255
256 if (dio->end_io && dio->result) 256 if (dio->end_io) {
257 dio->end_io(dio->iocb, offset, transferred, dio->private); 257 int err;
258
259 err = dio->end_io(dio->iocb, offset, ret, dio->private);
260 if (err)
261 ret = err;
262 }
258 263
259 if (!(dio->flags & DIO_SKIP_DIO_COUNT)) 264 if (!(dio->flags & DIO_SKIP_DIO_COUNT))
260 inode_dio_end(dio->inode); 265 inode_dio_end(dio->inode);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b285dc8a..56c12df107ab 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1504,15 +1504,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
1504 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); 1504 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
1505} 1505}
1506 1506
1507static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1508 struct ext4_io_end *io_end)
1509{
1510 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1511 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1512 atomic_inc(&EXT4_I(inode)->i_unwritten);
1513 }
1514}
1515
1516static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) 1507static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
1517{ 1508{
1518 return inode->i_private; 1509 return inode->i_private;
@@ -3293,6 +3284,27 @@ extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
3293extern int ext4_resize_begin(struct super_block *sb); 3284extern int ext4_resize_begin(struct super_block *sb);
3294extern void ext4_resize_end(struct super_block *sb); 3285extern void ext4_resize_end(struct super_block *sb);
3295 3286
3287static inline void ext4_set_io_unwritten_flag(struct inode *inode,
3288 struct ext4_io_end *io_end)
3289{
3290 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
3291 io_end->flag |= EXT4_IO_END_UNWRITTEN;
3292 atomic_inc(&EXT4_I(inode)->i_unwritten);
3293 }
3294}
3295
3296static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
3297{
3298 struct inode *inode = io_end->inode;
3299
3300 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
3301 io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
3302 /* Wake up anyone waiting on unwritten extent conversion */
3303 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
3304 wake_up_all(ext4_ioend_wq(inode));
3305 }
3306}
3307
3296#endif /* __KERNEL__ */ 3308#endif /* __KERNEL__ */
3297 3309
3298#define EFSBADCRC EBADMSG /* Bad CRC detected */ 3310#define EFSBADCRC EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bfb3bea..2b98171a9432 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3161,14 +3161,14 @@ out:
3161} 3161}
3162#endif 3162#endif
3163 3163
3164static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3164static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3165 ssize_t size, void *private) 3165 ssize_t size, void *private)
3166{ 3166{
3167 ext4_io_end_t *io_end = iocb->private; 3167 ext4_io_end_t *io_end = iocb->private;
3168 3168
3169 /* if not async direct IO just return */ 3169 /* if not async direct IO just return */
3170 if (!io_end) 3170 if (!io_end)
3171 return; 3171 return 0;
3172 3172
3173 ext_debug("ext4_end_io_dio(): io_end 0x%p " 3173 ext_debug("ext4_end_io_dio(): io_end 0x%p "
3174 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", 3174 "for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
@@ -3176,9 +3176,19 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3176 size); 3176 size);
3177 3177
3178 iocb->private = NULL; 3178 iocb->private = NULL;
3179 /*
3180 * Error during AIO DIO. We cannot convert unwritten extents as the
3181 * data was not written. Just clear the unwritten flag and drop io_end.
3182 */
3183 if (size <= 0) {
3184 ext4_clear_io_unwritten_flag(io_end);
3185 size = 0;
3186 }
3179 io_end->offset = offset; 3187 io_end->offset = offset;
3180 io_end->size = size; 3188 io_end->size = size;
3181 ext4_put_io_end(io_end); 3189 ext4_put_io_end(io_end);
3190
3191 return 0;
3182} 3192}
3183 3193
3184/* 3194/*
@@ -3301,16 +3311,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
3301 if (io_end) { 3311 if (io_end) {
3302 ext4_inode_aio_set(inode, NULL); 3312 ext4_inode_aio_set(inode, NULL);
3303 ext4_put_io_end(io_end); 3313 ext4_put_io_end(io_end);
3304 /*
3305 * When no IO was submitted ext4_end_io_dio() was not
3306 * called so we have to put iocb's reference.
3307 */
3308 if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
3309 WARN_ON(iocb->private != io_end);
3310 WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
3311 ext4_put_io_end(io_end);
3312 iocb->private = NULL;
3313 }
3314 } 3314 }
3315 if (ret > 0 && !overwrite && ext4_test_inode_state(inode, 3315 if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3316 EXT4_STATE_DIO_UNWRITTEN)) { 3316 EXT4_STATE_DIO_UNWRITTEN)) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 090b3498638e..f49a87c4fb63 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -139,16 +139,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
139 kmem_cache_free(io_end_cachep, io_end); 139 kmem_cache_free(io_end_cachep, io_end);
140} 140}
141 141
142static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
143{
144 struct inode *inode = io_end->inode;
145
146 io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
147 /* Wake up anyone waiting on unwritten extent conversion */
148 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
149 wake_up_all(ext4_ioend_wq(inode));
150}
151
152/* 142/*
153 * Check a range of space and convert unwritten extents to written. Note that 143 * Check a range of space and convert unwritten extents to written. Note that
154 * we are protected from truncate touching same part of extent tree by the 144 * we are protected from truncate touching same part of extent tree by the
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd1587f34..5dcc5f5a842e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -620,7 +620,7 @@ bail:
620 * particularly interested in the aio/dio case. We use the rw_lock DLM lock 620 * particularly interested in the aio/dio case. We use the rw_lock DLM lock
621 * to protect io on one node from truncation on another. 621 * to protect io on one node from truncation on another.
622 */ 622 */
623static void ocfs2_dio_end_io(struct kiocb *iocb, 623static int ocfs2_dio_end_io(struct kiocb *iocb,
624 loff_t offset, 624 loff_t offset,
625 ssize_t bytes, 625 ssize_t bytes,
626 void *private) 626 void *private)
@@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
628 struct inode *inode = file_inode(iocb->ki_filp); 628 struct inode *inode = file_inode(iocb->ki_filp);
629 int level; 629 int level;
630 630
631 if (bytes <= 0)
632 return 0;
633
631 /* this io's submitter should not have unlocked this before we could */ 634 /* this io's submitter should not have unlocked this before we could */
632 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 635 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
633 636
@@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
644 level = ocfs2_iocb_rw_locked_level(iocb); 647 level = ocfs2_iocb_rw_locked_level(iocb);
645 ocfs2_rw_unlock(inode, level); 648 ocfs2_rw_unlock(inode, level);
646 } 649 }
650
651 return 0;
647} 652}
648 653
649static int ocfs2_releasepage(struct page *page, gfp_t wait) 654static int ocfs2_releasepage(struct page *page, gfp_t wait)
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 3746367098fd..0ebc90496525 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type)
79 return 0; 79 return 0;
80} 80}
81 81
82static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, 82static int quota_quotaon(struct super_block *sb, int type, qid_t id,
83 struct path *path) 83 struct path *path)
84{ 84{
85 if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) 85 if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
@@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
222 return 0; 222 return 0;
223} 223}
224 224
225/*
226 * Return quota for next active quota >= this id, if any exists,
227 * otherwise return -ESRCH via ->get_nextdqblk
228 */
229static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
230 void __user *addr)
231{
232 struct kqid qid;
233 struct qc_dqblk fdq;
234 struct if_nextdqblk idq;
235 int ret;
236
237 if (!sb->s_qcop->get_nextdqblk)
238 return -ENOSYS;
239 qid = make_kqid(current_user_ns(), type, id);
240 if (!qid_valid(qid))
241 return -EINVAL;
242 ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
243 if (ret)
244 return ret;
245 /* struct if_nextdqblk is a superset of struct if_dqblk */
246 copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq);
247 idq.dqb_id = from_kqid(current_user_ns(), qid);
248 if (copy_to_user(addr, &idq, sizeof(idq)))
249 return -EFAULT;
250 return 0;
251}
252
225static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) 253static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
226{ 254{
227 dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); 255 dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
@@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
625 return ret; 653 return ret;
626} 654}
627 655
656/*
657 * Return quota for next active quota >= this id, if any exists,
658 * otherwise return -ESRCH via ->get_nextdqblk.
659 */
660static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
661 void __user *addr)
662{
663 struct fs_disk_quota fdq;
664 struct qc_dqblk qdq;
665 struct kqid qid;
666 qid_t id_out;
667 int ret;
668
669 if (!sb->s_qcop->get_nextdqblk)
670 return -ENOSYS;
671 qid = make_kqid(current_user_ns(), type, id);
672 if (!qid_valid(qid))
673 return -EINVAL;
674 ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
675 if (ret)
676 return ret;
677 id_out = from_kqid(current_user_ns(), qid);
678 copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
679 if (copy_to_user(addr, &fdq, sizeof(fdq)))
680 return -EFAULT;
681 return ret;
682}
683
628static int quota_rmxquota(struct super_block *sb, void __user *addr) 684static int quota_rmxquota(struct super_block *sb, void __user *addr)
629{ 685{
630 __u32 flags; 686 __u32 flags;
@@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
659 715
660 switch (cmd) { 716 switch (cmd) {
661 case Q_QUOTAON: 717 case Q_QUOTAON:
662 return quota_quotaon(sb, type, cmd, id, path); 718 return quota_quotaon(sb, type, id, path);
663 case Q_QUOTAOFF: 719 case Q_QUOTAOFF:
664 return quota_quotaoff(sb, type); 720 return quota_quotaoff(sb, type);
665 case Q_GETFMT: 721 case Q_GETFMT:
@@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
670 return quota_setinfo(sb, type, addr); 726 return quota_setinfo(sb, type, addr);
671 case Q_GETQUOTA: 727 case Q_GETQUOTA:
672 return quota_getquota(sb, type, id, addr); 728 return quota_getquota(sb, type, id, addr);
729 case Q_GETNEXTQUOTA:
730 return quota_getnextquota(sb, type, id, addr);
673 case Q_SETQUOTA: 731 case Q_SETQUOTA:
674 return quota_setquota(sb, type, id, addr); 732 return quota_setquota(sb, type, id, addr);
675 case Q_SYNC: 733 case Q_SYNC:
@@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
690 return quota_setxquota(sb, type, id, addr); 748 return quota_setxquota(sb, type, id, addr);
691 case Q_XGETQUOTA: 749 case Q_XGETQUOTA:
692 return quota_getxquota(sb, type, id, addr); 750 return quota_getxquota(sb, type, id, addr);
751 case Q_XGETNEXTQUOTA:
752 return quota_getnextxquota(sb, type, id, addr);
693 case Q_XQUOTASYNC: 753 case Q_XQUOTASYNC:
694 if (sb->s_flags & MS_RDONLY) 754 if (sb->s_flags & MS_RDONLY)
695 return -EROFS; 755 return -EROFS;
@@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd)
708 switch (cmd) { 768 switch (cmd) {
709 case Q_GETFMT: 769 case Q_GETFMT:
710 case Q_GETINFO: 770 case Q_GETINFO:
771 case Q_GETNEXTQUOTA:
711 case Q_SYNC: 772 case Q_SYNC:
712 case Q_XGETQSTAT: 773 case Q_XGETQSTAT:
713 case Q_XGETQSTATV: 774 case Q_XGETQSTATV:
714 case Q_XGETQUOTA: 775 case Q_XGETQUOTA:
776 case Q_XGETNEXTQUOTA:
715 case Q_XQUOTASYNC: 777 case Q_XQUOTASYNC:
716 return 0; 778 return 0;
717 } 779 }
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 444626ddbd1b..d9b42425291e 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -118,8 +118,6 @@ xfs_allocbt_free_block(
118 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 118 xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
119 XFS_EXTENT_BUSY_SKIP_DISCARD); 119 XFS_EXTENT_BUSY_SKIP_DISCARD);
120 xfs_trans_agbtree_delta(cur->bc_tp, -1); 120 xfs_trans_agbtree_delta(cur->bc_tp, -1);
121
122 xfs_trans_binval(cur->bc_tp, bp);
123 return 0; 121 return 0;
124} 122}
125 123
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 919756e3ba53..90928bbe693c 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -24,22 +24,6 @@
24 * Small attribute lists are packed as tightly as possible so as 24 * Small attribute lists are packed as tightly as possible so as
25 * to fit into the literal area of the inode. 25 * to fit into the literal area of the inode.
26 */ 26 */
27
28/*
29 * Entries are packed toward the top as tight as possible.
30 */
31typedef struct xfs_attr_shortform {
32 struct xfs_attr_sf_hdr { /* constant-structure header block */
33 __be16 totsize; /* total bytes in shortform list */
34 __u8 count; /* count of active entries */
35 } hdr;
36 struct xfs_attr_sf_entry {
37 __uint8_t namelen; /* actual length of name (no NULL) */
38 __uint8_t valuelen; /* actual length of value (no NULL) */
39 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
40 __uint8_t nameval[1]; /* name & value bytes concatenated */
41 } list[1]; /* variable sized array */
42} xfs_attr_shortform_t;
43typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; 27typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
44typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; 28typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
45 29
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 1637c37bfbaa..e37508ae589b 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -531,7 +531,6 @@ xfs_bmbt_free_block(
531 531
532 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 532 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
533 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 533 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
534 xfs_trans_binval(tp, bp);
535 return 0; 534 return 0;
536} 535}
537 536
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index a0eb18ce3ad3..1f88e1ce770f 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc(
294 return true; 294 return true;
295} 295}
296 296
297static int
298xfs_btree_free_block(
299 struct xfs_btree_cur *cur,
300 struct xfs_buf *bp)
301{
302 int error;
303
304 error = cur->bc_ops->free_block(cur, bp);
305 if (!error) {
306 xfs_trans_binval(cur->bc_tp, bp);
307 XFS_BTREE_STATS_INC(cur, free);
308 }
309 return error;
310}
311
297/* 312/*
298 * Delete the btree cursor. 313 * Delete the btree cursor.
299 */ 314 */
@@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot(
3209 int level; 3224 int level;
3210 int index; 3225 int index;
3211 int numrecs; 3226 int numrecs;
3227 int error;
3212#ifdef DEBUG 3228#ifdef DEBUG
3213 union xfs_btree_ptr ptr; 3229 union xfs_btree_ptr ptr;
3214 int i; 3230 int i;
@@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot(
3272 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3288 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3273#ifdef DEBUG 3289#ifdef DEBUG
3274 for (i = 0; i < numrecs; i++) { 3290 for (i = 0; i < numrecs; i++) {
3275 int error;
3276
3277 error = xfs_btree_check_ptr(cur, cpp, i, level - 1); 3291 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3278 if (error) { 3292 if (error) {
3279 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3293 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
@@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot(
3283#endif 3297#endif
3284 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3298 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3285 3299
3286 cur->bc_ops->free_block(cur, cbp); 3300 error = xfs_btree_free_block(cur, cbp);
3287 XFS_BTREE_STATS_INC(cur, free); 3301 if (error) {
3302 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3303 return error;
3304 }
3288 3305
3289 cur->bc_bufs[level - 1] = NULL; 3306 cur->bc_bufs[level - 1] = NULL;
3290 be16_add_cpu(&block->bb_level, -1); 3307 be16_add_cpu(&block->bb_level, -1);
@@ -3317,14 +3334,12 @@ xfs_btree_kill_root(
3317 */ 3334 */
3318 cur->bc_ops->set_root(cur, newroot, -1); 3335 cur->bc_ops->set_root(cur, newroot, -1);
3319 3336
3320 error = cur->bc_ops->free_block(cur, bp); 3337 error = xfs_btree_free_block(cur, bp);
3321 if (error) { 3338 if (error) {
3322 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3339 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3323 return error; 3340 return error;
3324 } 3341 }
3325 3342
3326 XFS_BTREE_STATS_INC(cur, free);
3327
3328 cur->bc_bufs[level] = NULL; 3343 cur->bc_bufs[level] = NULL;
3329 cur->bc_ra[level] = 0; 3344 cur->bc_ra[level] = 0;
3330 cur->bc_nlevels--; 3345 cur->bc_nlevels--;
@@ -3830,10 +3845,9 @@ xfs_btree_delrec(
3830 } 3845 }
3831 3846
3832 /* Free the deleted block. */ 3847 /* Free the deleted block. */
3833 error = cur->bc_ops->free_block(cur, rbp); 3848 error = xfs_btree_free_block(cur, rbp);
3834 if (error) 3849 if (error)
3835 goto error0; 3850 goto error0;
3836 XFS_BTREE_STATS_INC(cur, free);
3837 3851
3838 /* 3852 /*
3839 * If we joined with the left neighbor, set the buffer in the 3853 * If we joined with the left neighbor, set the buffer in the
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b14bbd6bb05f..8d4d8bce41bf 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
641 */ 641 */
642#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ 642#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
643 643
644/*
645 * Entries are packed toward the top as tight as possible.
646 */
647typedef struct xfs_attr_shortform {
648 struct xfs_attr_sf_hdr { /* constant-structure header block */
649 __be16 totsize; /* total bytes in shortform list */
650 __u8 count; /* count of active entries */
651 } hdr;
652 struct xfs_attr_sf_entry {
653 __uint8_t namelen; /* actual length of name (no NULL) */
654 __uint8_t valuelen; /* actual length of value (no NULL) */
655 __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
656 __uint8_t nameval[1]; /* name & value bytes concatenated */
657 } list[1]; /* variable sized array */
658} xfs_attr_shortform_t;
659
644typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ 660typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
645 __be16 base; /* base of free region */ 661 __be16 base; /* base of free region */
646 __be16 size; /* length of free region */ 662 __be16 size; /* length of free region */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index c679f3c05b63..89c21d771e35 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -125,16 +125,8 @@ xfs_inobt_free_block(
125 struct xfs_btree_cur *cur, 125 struct xfs_btree_cur *cur,
126 struct xfs_buf *bp) 126 struct xfs_buf *bp)
127{ 127{
128 xfs_fsblock_t fsbno; 128 return xfs_free_extent(cur->bc_tp,
129 int error; 129 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
130
131 fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
132 error = xfs_free_extent(cur->bc_tp, fsbno, 1);
133 if (error)
134 return error;
135
136 xfs_trans_binval(cur->bc_tp, bp);
137 return error;
138} 130}
139 131
140STATIC int 132STATIC int
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 0bf1c747439d..11faf7df14c8 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -31,6 +31,7 @@
31#include "xfs_error.h" 31#include "xfs_error.h"
32#include "xfs_trace.h" 32#include "xfs_trace.h"
33#include "xfs_attr_sf.h" 33#include "xfs_attr_sf.h"
34#include "xfs_da_format.h"
34 35
35kmem_zone_t *xfs_ifork_zone; 36kmem_zone_t *xfs_ifork_zone;
36 37
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 03f90b99b8c8..d54a8018b079 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -496,6 +496,8 @@ enum xfs_blft {
496 XFS_BLFT_ATTR_LEAF_BUF, 496 XFS_BLFT_ATTR_LEAF_BUF,
497 XFS_BLFT_ATTR_RMT_BUF, 497 XFS_BLFT_ATTR_RMT_BUF,
498 XFS_BLFT_SB_BUF, 498 XFS_BLFT_SB_BUF,
499 XFS_BLFT_RTBITMAP_BUF,
500 XFS_BLFT_RTSUMMARY_BUF,
499 XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS), 501 XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
500}; 502};
501 503
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index f51078f1e92a..8eed51275bb3 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -37,7 +37,7 @@ typedef __uint16_t xfs_qwarncnt_t;
37#define XFS_DQ_PROJ 0x0002 /* project quota */ 37#define XFS_DQ_PROJ 0x0002 /* project quota */
38#define XFS_DQ_GROUP 0x0004 /* a group quota */ 38#define XFS_DQ_GROUP 0x0004 /* a group quota */
39#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ 39#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
40#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ 40#define XFS_DQ_FREEING 0x0010 /* dquot is being torn down */
41 41
42#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 42#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
43 43
@@ -116,6 +116,7 @@ typedef __uint16_t xfs_qwarncnt_t;
116#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 116#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
117#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ 117#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
118#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ 118#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
119#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */
119 120
120/* 121/*
121 * flags to xfs_trans_mod_dquot to indicate which field needs to be 122 * flags to xfs_trans_mod_dquot to indicate which field needs to be
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index acc71dd36a2b..951c044e24e4 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -42,6 +42,31 @@
42 */ 42 */
43 43
44/* 44/*
45 * Real time buffers need verifiers to avoid runtime warnings during IO.
46 * We don't have anything to verify, however, so these are just dummy
47 * operations.
48 */
49static void
50xfs_rtbuf_verify_read(
51 struct xfs_buf *bp)
52{
53 return;
54}
55
56static void
57xfs_rtbuf_verify_write(
58 struct xfs_buf *bp)
59{
60 return;
61}
62
63const struct xfs_buf_ops xfs_rtbuf_ops = {
64 .name = "rtbuf",
65 .verify_read = xfs_rtbuf_verify_read,
66 .verify_write = xfs_rtbuf_verify_write,
67};
68
69/*
45 * Get a buffer for the bitmap or summary file block specified. 70 * Get a buffer for the bitmap or summary file block specified.
46 * The buffer is returned read and locked. 71 * The buffer is returned read and locked.
47 */ 72 */
@@ -68,9 +93,12 @@ xfs_rtbuf_get(
68 ASSERT(map.br_startblock != NULLFSBLOCK); 93 ASSERT(map.br_startblock != NULLFSBLOCK);
69 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 94 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
70 XFS_FSB_TO_DADDR(mp, map.br_startblock), 95 XFS_FSB_TO_DADDR(mp, map.br_startblock),
71 mp->m_bsize, 0, &bp, NULL); 96 mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
72 if (error) 97 if (error)
73 return error; 98 return error;
99
100 xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
101 : XFS_BLFT_RTBITMAP_BUF);
74 *bpp = bp; 102 *bpp = bp;
75 return 0; 103 return 0;
76} 104}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index b25bb9a343f3..961e6475a309 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
27extern void xfs_perag_put(struct xfs_perag *pag); 27extern void xfs_perag_put(struct xfs_perag *pag);
28extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); 28extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
29 29
30extern void xfs_sb_calc_crc(struct xfs_buf *bp);
31extern void xfs_log_sb(struct xfs_trans *tp); 30extern void xfs_log_sb(struct xfs_trans *tp);
32extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); 31extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
33extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); 32extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 15c3ceb845b9..81ac870834da 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
53extern const struct xfs_buf_ops xfs_sb_buf_ops; 53extern const struct xfs_buf_ops xfs_sb_buf_ops;
54extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; 54extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
55extern const struct xfs_buf_ops xfs_symlink_buf_ops; 55extern const struct xfs_buf_ops xfs_symlink_buf_ops;
56extern const struct xfs_buf_ops xfs_rtbuf_ops;
56 57
57/* 58/*
58 * Transaction types. Used to distinguish types of buffers. These never reach 59 * Transaction types. Used to distinguish types of buffers. These never reach
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089fb051..14ac9822b303 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,10 @@
36#include <linux/pagevec.h> 36#include <linux/pagevec.h>
37#include <linux/writeback.h> 37#include <linux/writeback.h>
38 38
39/* flags for direct write completions */
40#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
41#define XFS_DIO_FLAG_APPEND (1 << 1)
42
39void 43void
40xfs_count_page_state( 44xfs_count_page_state(
41 struct page *page, 45 struct page *page,
@@ -214,10 +218,12 @@ xfs_end_io(
214 struct xfs_inode *ip = XFS_I(ioend->io_inode); 218 struct xfs_inode *ip = XFS_I(ioend->io_inode);
215 int error = 0; 219 int error = 0;
216 220
217 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 221 /*
222 * Set an error if the mount has shut down and proceed with end I/O
223 * processing so it can perform whatever cleanups are necessary.
224 */
225 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
218 ioend->io_error = -EIO; 226 ioend->io_error = -EIO;
219 goto done;
220 }
221 227
222 /* 228 /*
223 * For unwritten extents we need to issue transactions to convert a 229 * For unwritten extents we need to issue transactions to convert a
@@ -1238,27 +1244,8 @@ xfs_vm_releasepage(
1238} 1244}
1239 1245
1240/* 1246/*
1241 * When we map a DIO buffer, we may need to attach an ioend that describes the 1247 * When we map a DIO buffer, we may need to pass flags to
1242 * type of write IO we are doing. This passes to the completion function the 1248 * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
1243 * operations it needs to perform. If the mapping is for an overwrite wholly
1244 * within the EOF then we don't need an ioend and so we don't allocate one.
1245 * This avoids the unnecessary overhead of allocating and freeing ioends for
1246 * workloads that don't require transactions on IO completion.
1247 *
1248 * If we get multiple mappings in a single IO, we might be mapping different
1249 * types. But because the direct IO can only have a single private pointer, we
1250 * need to ensure that:
1251 *
1252 * a) i) the ioend spans the entire region of unwritten mappings; or
1253 * ii) the ioend spans all the mappings that cross or are beyond EOF; and
1254 * b) if it contains unwritten extents, it is *permanently* marked as such
1255 *
1256 * We could do this by chaining ioends like buffered IO does, but we only
1257 * actually get one IO completion callback from the direct IO, and that spans
1258 * the entire IO regardless of how many mappings and IOs are needed to complete
1259 * the DIO. There is only going to be one reference to the ioend and its life
1260 * cycle is constrained by the DIO completion code. hence we don't need
1261 * reference counting here.
1262 * 1249 *
1263 * Note that for DIO, an IO to the highest supported file block offset (i.e. 1250 * Note that for DIO, an IO to the highest supported file block offset (i.e.
1264 * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 1251 * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
@@ -1266,68 +1253,26 @@ xfs_vm_releasepage(
1266 * extending the file size. We won't know for sure until IO completion is run 1253 * extending the file size. We won't know for sure until IO completion is run
1267 * and the actual max write offset is communicated to the IO completion 1254 * and the actual max write offset is communicated to the IO completion
1268 * routine. 1255 * routine.
1269 *
1270 * For DAX page faults, we are preparing to never see unwritten extents here,
1271 * nor should we ever extend the inode size. Hence we will soon have nothing to
1272 * do here for this case, ensuring we don't have to provide an IO completion
1273 * callback to free an ioend that we don't actually need for a fault into the
1274 * page at offset (2^63 - 1FSB) bytes.
1275 */ 1256 */
1276
1277static void 1257static void
1278xfs_map_direct( 1258xfs_map_direct(
1279 struct inode *inode, 1259 struct inode *inode,
1280 struct buffer_head *bh_result, 1260 struct buffer_head *bh_result,
1281 struct xfs_bmbt_irec *imap, 1261 struct xfs_bmbt_irec *imap,
1282 xfs_off_t offset, 1262 xfs_off_t offset)
1283 bool dax_fault)
1284{ 1263{
1285 struct xfs_ioend *ioend; 1264 uintptr_t *flags = (uintptr_t *)&bh_result->b_private;
1286 xfs_off_t size = bh_result->b_size; 1265 xfs_off_t size = bh_result->b_size;
1287 int type;
1288 1266
1289 if (ISUNWRITTEN(imap)) 1267 trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
1290 type = XFS_IO_UNWRITTEN; 1268 ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
1291 else
1292 type = XFS_IO_OVERWRITE;
1293
1294 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
1295
1296 if (dax_fault) {
1297 ASSERT(type == XFS_IO_OVERWRITE);
1298 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1299 imap);
1300 return;
1301 }
1302
1303 if (bh_result->b_private) {
1304 ioend = bh_result->b_private;
1305 ASSERT(ioend->io_size > 0);
1306 ASSERT(offset >= ioend->io_offset);
1307 if (offset + size > ioend->io_offset + ioend->io_size)
1308 ioend->io_size = offset - ioend->io_offset + size;
1309
1310 if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
1311 ioend->io_type = XFS_IO_UNWRITTEN;
1312
1313 trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
1314 ioend->io_size, ioend->io_type,
1315 imap);
1316 } else if (type == XFS_IO_UNWRITTEN ||
1317 offset + size > i_size_read(inode) ||
1318 offset + size < 0) {
1319 ioend = xfs_alloc_ioend(inode, type);
1320 ioend->io_offset = offset;
1321 ioend->io_size = size;
1322 1269
1323 bh_result->b_private = ioend; 1270 if (ISUNWRITTEN(imap)) {
1271 *flags |= XFS_DIO_FLAG_UNWRITTEN;
1272 set_buffer_defer_completion(bh_result);
1273 } else if (offset + size > i_size_read(inode) || offset + size < 0) {
1274 *flags |= XFS_DIO_FLAG_APPEND;
1324 set_buffer_defer_completion(bh_result); 1275 set_buffer_defer_completion(bh_result);
1325
1326 trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
1327 imap);
1328 } else {
1329 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1330 imap);
1331 } 1276 }
1332} 1277}
1333 1278
@@ -1498,9 +1443,12 @@ __xfs_get_blocks(
1498 if (ISUNWRITTEN(&imap)) 1443 if (ISUNWRITTEN(&imap))
1499 set_buffer_unwritten(bh_result); 1444 set_buffer_unwritten(bh_result);
1500 /* direct IO needs special help */ 1445 /* direct IO needs special help */
1501 if (create && direct) 1446 if (create && direct) {
1502 xfs_map_direct(inode, bh_result, &imap, offset, 1447 if (dax_fault)
1503 dax_fault); 1448 ASSERT(!ISUNWRITTEN(&imap));
1449 else
1450 xfs_map_direct(inode, bh_result, &imap, offset);
1451 }
1504 } 1452 }
1505 1453
1506 /* 1454 /*
@@ -1570,42 +1518,50 @@ xfs_get_blocks_dax_fault(
1570 return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); 1518 return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
1571} 1519}
1572 1520
1573static void 1521/*
1574__xfs_end_io_direct_write( 1522 * Complete a direct I/O write request.
1575 struct inode *inode, 1523 *
1576 struct xfs_ioend *ioend, 1524 * xfs_map_direct passes us some flags in the private data to tell us what to
1525 * do. If no flags are set, then the write IO is an overwrite wholly within
1526 * the existing allocated file size and so there is nothing for us to do.
1527 *
1528 * Note that in this case the completion can be called in interrupt context,
1529 * whereas if we have flags set we will always be called in task context
1530 * (i.e. from a workqueue).
1531 */
1532STATIC int
1533xfs_end_io_direct_write(
1534 struct kiocb *iocb,
1577 loff_t offset, 1535 loff_t offset,
1578 ssize_t size) 1536 ssize_t size,
1537 void *private)
1579{ 1538{
1580 struct xfs_mount *mp = XFS_I(inode)->i_mount; 1539 struct inode *inode = file_inode(iocb->ki_filp);
1540 struct xfs_inode *ip = XFS_I(inode);
1541 struct xfs_mount *mp = ip->i_mount;
1542 uintptr_t flags = (uintptr_t)private;
1543 int error = 0;
1581 1544
1582 if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error) 1545 trace_xfs_end_io_direct_write(ip, offset, size);
1583 goto out_end_io;
1584 1546
1585 /* 1547 if (XFS_FORCED_SHUTDOWN(mp))
1586 * dio completion end_io functions are only called on writes if more 1548 return -EIO;
1587 * than 0 bytes was written.
1588 */
1589 ASSERT(size > 0);
1590 1549
1591 /* 1550 if (size <= 0)
1592 * The ioend only maps whole blocks, while the IO may be sector aligned. 1551 return size;
1593 * Hence the ioend offset/size may not match the IO offset/size exactly.
1594 * Because we don't map overwrites within EOF into the ioend, the offset
1595 * may not match, but only if the endio spans EOF. Either way, write
1596 * the IO sizes into the ioend so that completion processing does the
1597 * right thing.
1598 */
1599 ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
1600 ioend->io_size = size;
1601 ioend->io_offset = offset;
1602 1552
1603 /* 1553 /*
1604 * The ioend tells us whether we are doing unwritten extent conversion 1554 * The flags tell us whether we are doing unwritten extent conversions
1605 * or an append transaction that updates the on-disk file size. These 1555 * or an append transaction that updates the on-disk file size. These
1606 * cases are the only cases where we should *potentially* be needing 1556 * cases are the only cases where we should *potentially* be needing
1607 * to update the VFS inode size. 1557 * to update the VFS inode size.
1608 * 1558 */
1559 if (flags == 0) {
1560 ASSERT(offset + size <= i_size_read(inode));
1561 return 0;
1562 }
1563
1564 /*
1609 * We need to update the in-core inode size here so that we don't end up 1565 * We need to update the in-core inode size here so that we don't end up
1610 * with the on-disk inode size being outside the in-core inode size. We 1566 * with the on-disk inode size being outside the in-core inode size. We
1611 * have no other method of updating EOF for AIO, so always do it here 1567 * have no other method of updating EOF for AIO, so always do it here
@@ -1616,91 +1572,56 @@ __xfs_end_io_direct_write(
1616 * here can result in EOF moving backwards and Bad Things Happen when 1572 * here can result in EOF moving backwards and Bad Things Happen when
1617 * that occurs. 1573 * that occurs.
1618 */ 1574 */
1619 spin_lock(&XFS_I(inode)->i_flags_lock); 1575 spin_lock(&ip->i_flags_lock);
1620 if (offset + size > i_size_read(inode)) 1576 if (offset + size > i_size_read(inode))
1621 i_size_write(inode, offset + size); 1577 i_size_write(inode, offset + size);
1622 spin_unlock(&XFS_I(inode)->i_flags_lock); 1578 spin_unlock(&ip->i_flags_lock);
1623 1579
1624 /* 1580 if (flags & XFS_DIO_FLAG_UNWRITTEN) {
1625 * If we are doing an append IO that needs to update the EOF on disk, 1581 trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
1626 * do the transaction reserve now so we can use common end io
1627 * processing. Stashing the error (if there is one) in the ioend will
1628 * result in the ioend processing passing on the error if it is
1629 * possible as we can't return it from here.
1630 */
1631 if (ioend->io_type == XFS_IO_OVERWRITE)
1632 ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
1633 1582
1634out_end_io: 1583 error = xfs_iomap_write_unwritten(ip, offset, size);
1635 xfs_end_io(&ioend->io_work); 1584 } else if (flags & XFS_DIO_FLAG_APPEND) {
1636 return; 1585 struct xfs_trans *tp;
1637}
1638 1586
1639/* 1587 trace_xfs_end_io_direct_write_append(ip, offset, size);
1640 * Complete a direct I/O write request.
1641 *
1642 * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
1643 * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
1644 * wholly within the EOF and so there is nothing for us to do. Note that in this
1645 * case the completion can be called in interrupt context, whereas if we have an
1646 * ioend we will always be called in task context (i.e. from a workqueue).
1647 */
1648STATIC void
1649xfs_end_io_direct_write(
1650 struct kiocb *iocb,
1651 loff_t offset,
1652 ssize_t size,
1653 void *private)
1654{
1655 struct inode *inode = file_inode(iocb->ki_filp);
1656 struct xfs_ioend *ioend = private;
1657 1588
1658 trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size, 1589 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
1659 ioend ? ioend->io_type : 0, NULL); 1590 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
1660 1591 if (error) {
1661 if (!ioend) { 1592 xfs_trans_cancel(tp);
1662 ASSERT(offset + size <= i_size_read(inode)); 1593 return error;
1663 return; 1594 }
1595 error = xfs_setfilesize(ip, tp, offset, size);
1664 } 1596 }
1665 1597
1666 __xfs_end_io_direct_write(inode, ioend, offset, size); 1598 return error;
1667} 1599}
1668 1600
1669static inline ssize_t 1601STATIC ssize_t
1670xfs_vm_do_dio( 1602xfs_vm_direct_IO(
1671 struct inode *inode,
1672 struct kiocb *iocb, 1603 struct kiocb *iocb,
1673 struct iov_iter *iter, 1604 struct iov_iter *iter,
1674 loff_t offset, 1605 loff_t offset)
1675 void (*endio)(struct kiocb *iocb,
1676 loff_t offset,
1677 ssize_t size,
1678 void *private),
1679 int flags)
1680{ 1606{
1607 struct inode *inode = iocb->ki_filp->f_mapping->host;
1608 dio_iodone_t *endio = NULL;
1609 int flags = 0;
1681 struct block_device *bdev; 1610 struct block_device *bdev;
1682 1611
1683 if (IS_DAX(inode)) 1612 if (iov_iter_rw(iter) == WRITE) {
1613 endio = xfs_end_io_direct_write;
1614 flags = DIO_ASYNC_EXTEND;
1615 }
1616
1617 if (IS_DAX(inode)) {
1684 return dax_do_io(iocb, inode, iter, offset, 1618 return dax_do_io(iocb, inode, iter, offset,
1685 xfs_get_blocks_direct, endio, 0); 1619 xfs_get_blocks_direct, endio, 0);
1620 }
1686 1621
1687 bdev = xfs_find_bdev_for_inode(inode); 1622 bdev = xfs_find_bdev_for_inode(inode);
1688 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, 1623 return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
1689 xfs_get_blocks_direct, endio, NULL, flags); 1624 xfs_get_blocks_direct, endio, NULL, flags);
1690}
1691
1692STATIC ssize_t
1693xfs_vm_direct_IO(
1694 struct kiocb *iocb,
1695 struct iov_iter *iter,
1696 loff_t offset)
1697{
1698 struct inode *inode = iocb->ki_filp->f_mapping->host;
1699
1700 if (iov_iter_rw(iter) == WRITE)
1701 return xfs_vm_do_dio(inode, iocb, iter, offset,
1702 xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
1703 return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
1704} 1625}
1705 1626
1706/* 1627/*
@@ -1783,14 +1704,22 @@ xfs_vm_write_failed(
1783 if (block_start >= to) 1704 if (block_start >= to)
1784 break; 1705 break;
1785 1706
1786 if (!buffer_delay(bh)) 1707 /*
1708 * Process delalloc and unwritten buffers beyond EOF. We can
1709 * encounter unwritten buffers in the event that a file has
1710 * post-EOF unwritten extents and an extending write happens to
1711 * fail (e.g., an unaligned write that also involves a delalloc
1712 * to the same page).
1713 */
1714 if (!buffer_delay(bh) && !buffer_unwritten(bh))
1787 continue; 1715 continue;
1788 1716
1789 if (!buffer_new(bh) && block_offset < i_size_read(inode)) 1717 if (!buffer_new(bh) && block_offset < i_size_read(inode))
1790 continue; 1718 continue;
1791 1719
1792 xfs_vm_kill_delalloc_range(inode, block_offset, 1720 if (buffer_delay(bh))
1793 block_offset + bh->b_size); 1721 xfs_vm_kill_delalloc_range(inode, block_offset,
1722 block_offset + bh->b_size);
1794 1723
1795 /* 1724 /*
1796 * This buffer does not contain data anymore. make sure anyone 1725 * This buffer does not contain data anymore. make sure anyone
@@ -1801,6 +1730,7 @@ xfs_vm_write_failed(
1801 clear_buffer_mapped(bh); 1730 clear_buffer_mapped(bh);
1802 clear_buffer_new(bh); 1731 clear_buffer_new(bh);
1803 clear_buffer_dirty(bh); 1732 clear_buffer_dirty(bh);
1733 clear_buffer_unwritten(bh);
1804 } 1734 }
1805 1735
1806} 1736}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 708775613e55..fd7f51c39b3f 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -202,10 +202,12 @@ xfs_bmap_rtalloc(
202 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 202 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
203 203
204 /* 204 /*
205 * Lock out other modifications to the RT bitmap inode. 205 * Lock out modifications to both the RT bitmap and summary inodes
206 */ 206 */
207 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); 207 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
208 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); 208 xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
209 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
210 xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
209 211
210 /* 212 /*
211 * If it's an allocation to an empty file at offset 0, 213 * If it's an allocation to an empty file at offset 0,
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9c44d38dcd1f..316b2a1bdba5 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits(
92{ 92{
93 struct xfs_quotainfo *q = mp->m_quotainfo; 93 struct xfs_quotainfo *q = mp->m_quotainfo;
94 struct xfs_disk_dquot *d = &dq->q_core; 94 struct xfs_disk_dquot *d = &dq->q_core;
95 struct xfs_def_quota *defq;
95 int prealloc = 0; 96 int prealloc = 0;
96 97
97 ASSERT(d->d_id); 98 ASSERT(d->d_id);
99 defq = xfs_get_defquota(dq, q);
98 100
99 if (q->qi_bsoftlimit && !d->d_blk_softlimit) { 101 if (defq->bsoftlimit && !d->d_blk_softlimit) {
100 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); 102 d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
101 prealloc = 1; 103 prealloc = 1;
102 } 104 }
103 if (q->qi_bhardlimit && !d->d_blk_hardlimit) { 105 if (defq->bhardlimit && !d->d_blk_hardlimit) {
104 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); 106 d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
105 prealloc = 1; 107 prealloc = 1;
106 } 108 }
107 if (q->qi_isoftlimit && !d->d_ino_softlimit) 109 if (defq->isoftlimit && !d->d_ino_softlimit)
108 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); 110 d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
109 if (q->qi_ihardlimit && !d->d_ino_hardlimit) 111 if (defq->ihardlimit && !d->d_ino_hardlimit)
110 d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); 112 d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
111 if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) 113 if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
112 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); 114 d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
113 if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) 115 if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
114 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); 116 d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
115 117
116 if (prealloc) 118 if (prealloc)
117 xfs_dquot_set_prealloc_limits(dq); 119 xfs_dquot_set_prealloc_limits(dq);
@@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk(
232{ 234{
233 struct xfs_quotainfo *q = mp->m_quotainfo; 235 struct xfs_quotainfo *q = mp->m_quotainfo;
234 xfs_dqblk_t *d; 236 xfs_dqblk_t *d;
235 int curid, i; 237 xfs_dqid_t curid;
238 int i;
236 239
237 ASSERT(tp); 240 ASSERT(tp);
238 ASSERT(xfs_buf_islocked(bp)); 241 ASSERT(xfs_buf_islocked(bp));
@@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk(
243 * ID of the first dquot in the block - id's are zero based. 246 * ID of the first dquot in the block - id's are zero based.
244 */ 247 */
245 curid = id - (id % q->qi_dqperchunk); 248 curid = id - (id % q->qi_dqperchunk);
246 ASSERT(curid >= 0);
247 memset(d, 0, BBTOB(q->qi_dqchunklen)); 249 memset(d, 0, BBTOB(q->qi_dqchunklen));
248 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { 250 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
249 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 251 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -464,12 +466,13 @@ xfs_qm_dqtobp(
464 struct xfs_bmbt_irec map; 466 struct xfs_bmbt_irec map;
465 int nmaps = 1, error; 467 int nmaps = 1, error;
466 struct xfs_buf *bp; 468 struct xfs_buf *bp;
467 struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp); 469 struct xfs_inode *quotip;
468 struct xfs_mount *mp = dqp->q_mount; 470 struct xfs_mount *mp = dqp->q_mount;
469 xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); 471 xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
470 struct xfs_trans *tp = (tpp ? *tpp : NULL); 472 struct xfs_trans *tp = (tpp ? *tpp : NULL);
471 uint lock_mode; 473 uint lock_mode;
472 474
475 quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
473 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 476 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
474 477
475 lock_mode = xfs_ilock_data_map_shared(quotip); 478 lock_mode = xfs_ilock_data_map_shared(quotip);
@@ -685,6 +688,56 @@ error0:
685} 688}
686 689
687/* 690/*
691 * Advance to the next id in the current chunk, or if at the
692 * end of the chunk, skip ahead to first id in next allocated chunk
693 * using the SEEK_DATA interface.
694 */
695int
696xfs_dq_get_next_id(
697 xfs_mount_t *mp,
698 uint type,
699 xfs_dqid_t *id,
700 loff_t eof)
701{
702 struct xfs_inode *quotip;
703 xfs_fsblock_t start;
704 loff_t offset;
705 uint lock;
706 xfs_dqid_t next_id;
707 int error = 0;
708
709 /* Simple advance */
710 next_id = *id + 1;
711
712 /* If new ID is within the current chunk, advancing it sufficed */
713 if (next_id % mp->m_quotainfo->qi_dqperchunk) {
714 *id = next_id;
715 return 0;
716 }
717
718 /* Nope, next_id is now past the current chunk, so find the next one */
719 start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
720
721 quotip = xfs_quota_inode(mp, type);
722 lock = xfs_ilock_data_map_shared(quotip);
723
724 offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
725 eof, SEEK_DATA);
726 if (offset < 0)
727 error = offset;
728
729 xfs_iunlock(quotip, lock);
730
731 /* -ENXIO is essentially "no more data" */
732 if (error)
733 return (error == -ENXIO ? -ENOENT: error);
734
735 /* Convert next data offset back to a quota id */
736 *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
737 return 0;
738}
739
740/*
688 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a 741 * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
689 * a locked dquot, doing an allocation (if requested) as needed. 742 * a locked dquot, doing an allocation (if requested) as needed.
690 * When both an inode and an id are given, the inode's id takes precedence. 743 * When both an inode and an id are given, the inode's id takes precedence.
@@ -704,6 +757,7 @@ xfs_qm_dqget(
704 struct xfs_quotainfo *qi = mp->m_quotainfo; 757 struct xfs_quotainfo *qi = mp->m_quotainfo;
705 struct radix_tree_root *tree = xfs_dquot_tree(qi, type); 758 struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
706 struct xfs_dquot *dqp; 759 struct xfs_dquot *dqp;
760 loff_t eof = 0;
707 int error; 761 int error;
708 762
709 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 763 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -731,6 +785,21 @@ xfs_qm_dqget(
731 } 785 }
732#endif 786#endif
733 787
788 /* Get the end of the quota file if we need it */
789 if (flags & XFS_QMOPT_DQNEXT) {
790 struct xfs_inode *quotip;
791 xfs_fileoff_t last;
792 uint lock_mode;
793
794 quotip = xfs_quota_inode(mp, type);
795 lock_mode = xfs_ilock_data_map_shared(quotip);
796 error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
797 xfs_iunlock(quotip, lock_mode);
798 if (error)
799 return error;
800 eof = XFS_FSB_TO_B(mp, last);
801 }
802
734restart: 803restart:
735 mutex_lock(&qi->qi_tree_lock); 804 mutex_lock(&qi->qi_tree_lock);
736 dqp = radix_tree_lookup(tree, id); 805 dqp = radix_tree_lookup(tree, id);
@@ -744,6 +813,18 @@ restart:
744 goto restart; 813 goto restart;
745 } 814 }
746 815
816 /* uninit / unused quota found in radix tree, keep looking */
817 if (flags & XFS_QMOPT_DQNEXT) {
818 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
819 xfs_dqunlock(dqp);
820 mutex_unlock(&qi->qi_tree_lock);
821 error = xfs_dq_get_next_id(mp, type, &id, eof);
822 if (error)
823 return error;
824 goto restart;
825 }
826 }
827
747 dqp->q_nrefs++; 828 dqp->q_nrefs++;
748 mutex_unlock(&qi->qi_tree_lock); 829 mutex_unlock(&qi->qi_tree_lock);
749 830
@@ -770,6 +851,13 @@ restart:
770 if (ip) 851 if (ip)
771 xfs_ilock(ip, XFS_ILOCK_EXCL); 852 xfs_ilock(ip, XFS_ILOCK_EXCL);
772 853
854 /* If we are asked to find next active id, keep looking */
855 if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
856 error = xfs_dq_get_next_id(mp, type, &id, eof);
857 if (!error)
858 goto restart;
859 }
860
773 if (error) 861 if (error)
774 return error; 862 return error;
775 863
@@ -820,6 +908,17 @@ restart:
820 qi->qi_dquots++; 908 qi->qi_dquots++;
821 mutex_unlock(&qi->qi_tree_lock); 909 mutex_unlock(&qi->qi_tree_lock);
822 910
911 /* If we are asked to find next active id, keep looking */
912 if (flags & XFS_QMOPT_DQNEXT) {
913 if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
914 xfs_qm_dqput(dqp);
915 error = xfs_dq_get_next_id(mp, type, &id, eof);
916 if (error)
917 return error;
918 goto restart;
919 }
920 }
921
823 dqret: 922 dqret:
824 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); 923 ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
825 trace_xfs_dqget_miss(dqp); 924 trace_xfs_dqget_miss(dqp);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f7333fbba5c2..ac0fd32de31e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1337,31 +1337,31 @@ out:
1337 return found; 1337 return found;
1338} 1338}
1339 1339
1340STATIC loff_t 1340/*
1341xfs_seek_hole_data( 1341 * caller must lock inode with xfs_ilock_data_map_shared,
1342 struct file *file, 1342 * can we craft an appropriate ASSERT?
1343 *
1344 * end is because the VFS-level lseek interface is defined such that any
1345 * offset past i_size shall return -ENXIO, but we use this for quota code
1346 * which does not maintain i_size, and we want to SEEK_DATA past i_size.
1347 */
1348loff_t
1349__xfs_seek_hole_data(
1350 struct inode *inode,
1343 loff_t start, 1351 loff_t start,
1352 loff_t end,
1344 int whence) 1353 int whence)
1345{ 1354{
1346 struct inode *inode = file->f_mapping->host;
1347 struct xfs_inode *ip = XFS_I(inode); 1355 struct xfs_inode *ip = XFS_I(inode);
1348 struct xfs_mount *mp = ip->i_mount; 1356 struct xfs_mount *mp = ip->i_mount;
1349 loff_t uninitialized_var(offset); 1357 loff_t uninitialized_var(offset);
1350 xfs_fsize_t isize;
1351 xfs_fileoff_t fsbno; 1358 xfs_fileoff_t fsbno;
1352 xfs_filblks_t end; 1359 xfs_filblks_t lastbno;
1353 uint lock;
1354 int error; 1360 int error;
1355 1361
1356 if (XFS_FORCED_SHUTDOWN(mp)) 1362 if (start >= end) {
1357 return -EIO;
1358
1359 lock = xfs_ilock_data_map_shared(ip);
1360
1361 isize = i_size_read(inode);
1362 if (start >= isize) {
1363 error = -ENXIO; 1363 error = -ENXIO;
1364 goto out_unlock; 1364 goto out_error;
1365 } 1365 }
1366 1366
1367 /* 1367 /*
@@ -1369,22 +1369,22 @@ xfs_seek_hole_data(
1369 * by fsbno to the end block of the file. 1369 * by fsbno to the end block of the file.
1370 */ 1370 */
1371 fsbno = XFS_B_TO_FSBT(mp, start); 1371 fsbno = XFS_B_TO_FSBT(mp, start);
1372 end = XFS_B_TO_FSB(mp, isize); 1372 lastbno = XFS_B_TO_FSB(mp, end);
1373 1373
1374 for (;;) { 1374 for (;;) {
1375 struct xfs_bmbt_irec map[2]; 1375 struct xfs_bmbt_irec map[2];
1376 int nmap = 2; 1376 int nmap = 2;
1377 unsigned int i; 1377 unsigned int i;
1378 1378
1379 error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, 1379 error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
1380 XFS_BMAPI_ENTIRE); 1380 XFS_BMAPI_ENTIRE);
1381 if (error) 1381 if (error)
1382 goto out_unlock; 1382 goto out_error;
1383 1383
1384 /* No extents at given offset, must be beyond EOF */ 1384 /* No extents at given offset, must be beyond EOF */
1385 if (nmap == 0) { 1385 if (nmap == 0) {
1386 error = -ENXIO; 1386 error = -ENXIO;
1387 goto out_unlock; 1387 goto out_error;
1388 } 1388 }
1389 1389
1390 for (i = 0; i < nmap; i++) { 1390 for (i = 0; i < nmap; i++) {
@@ -1426,7 +1426,7 @@ xfs_seek_hole_data(
1426 * hole at the end of any file). 1426 * hole at the end of any file).
1427 */ 1427 */
1428 if (whence == SEEK_HOLE) { 1428 if (whence == SEEK_HOLE) {
1429 offset = isize; 1429 offset = end;
1430 break; 1430 break;
1431 } 1431 }
1432 /* 1432 /*
@@ -1434,7 +1434,7 @@ xfs_seek_hole_data(
1434 */ 1434 */
1435 ASSERT(whence == SEEK_DATA); 1435 ASSERT(whence == SEEK_DATA);
1436 error = -ENXIO; 1436 error = -ENXIO;
1437 goto out_unlock; 1437 goto out_error;
1438 } 1438 }
1439 1439
1440 ASSERT(i > 1); 1440 ASSERT(i > 1);
@@ -1445,14 +1445,14 @@ xfs_seek_hole_data(
1445 */ 1445 */
1446 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; 1446 fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
1447 start = XFS_FSB_TO_B(mp, fsbno); 1447 start = XFS_FSB_TO_B(mp, fsbno);
1448 if (start >= isize) { 1448 if (start >= end) {
1449 if (whence == SEEK_HOLE) { 1449 if (whence == SEEK_HOLE) {
1450 offset = isize; 1450 offset = end;
1451 break; 1451 break;
1452 } 1452 }
1453 ASSERT(whence == SEEK_DATA); 1453 ASSERT(whence == SEEK_DATA);
1454 error = -ENXIO; 1454 error = -ENXIO;
1455 goto out_unlock; 1455 goto out_error;
1456 } 1456 }
1457 } 1457 }
1458 1458
@@ -1464,7 +1464,39 @@ out:
1464 * situation in particular. 1464 * situation in particular.
1465 */ 1465 */
1466 if (whence == SEEK_HOLE) 1466 if (whence == SEEK_HOLE)
1467 offset = min_t(loff_t, offset, isize); 1467 offset = min_t(loff_t, offset, end);
1468
1469 return offset;
1470
1471out_error:
1472 return error;
1473}
1474
1475STATIC loff_t
1476xfs_seek_hole_data(
1477 struct file *file,
1478 loff_t start,
1479 int whence)
1480{
1481 struct inode *inode = file->f_mapping->host;
1482 struct xfs_inode *ip = XFS_I(inode);
1483 struct xfs_mount *mp = ip->i_mount;
1484 uint lock;
1485 loff_t offset, end;
1486 int error = 0;
1487
1488 if (XFS_FORCED_SHUTDOWN(mp))
1489 return -EIO;
1490
1491 lock = xfs_ilock_data_map_shared(ip);
1492
1493 end = i_size_read(inode);
1494 offset = __xfs_seek_hole_data(inode, start, end, whence);
1495 if (offset < 0) {
1496 error = offset;
1497 goto out_unlock;
1498 }
1499
1468 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 1500 offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
1469 1501
1470out_unlock: 1502out_unlock:
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 1b6a98b66886..f32713f14f9a 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, 25extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
26 xfs_fsop_resblks_t *outval); 26 xfs_fsop_resblks_t *outval);
27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); 27extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
28extern int xfs_fs_log_dummy(struct xfs_mount *mp);
29 28
30#endif /* __XFS_FSOPS_H__ */ 29#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index d6277494e606..43e1d51b15eb 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -435,6 +435,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
435int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, 435int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
436 xfs_fsize_t isize, bool *did_zeroing); 436 xfs_fsize_t isize, bool *did_zeroing);
437int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); 437int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
438loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
439 loff_t eof, int whence);
438 440
439 441
440/* from xfs_iops.c */ 442/* from xfs_iops.c */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index bd6f23b952a5..e776594889c3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
1109 bool tmp_wrapped; 1109 bool tmp_wrapped;
1110 1110
1111 /* 1111 /*
1112 * Search backwards through the log looking for the log record header 1112 * Check the head of the log for torn writes. Search backwards from the
1113 * block. This wraps all the way back around to the head so something is 1113 * head until we hit the tail or the maximum number of log record I/Os
1114 * seriously wrong if we can't find it. 1114 * that could have been in flight at one time. Use a temporary buffer so
1115 */ 1115 * we don't trash the rhead/bp pointers from the caller.
1116 found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
1117 rhead, wrapped);
1118 if (found < 0)
1119 return found;
1120 if (!found) {
1121 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1122 return -EIO;
1123 }
1124
1125 *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1126
1127 /*
1128 * Now that we have a tail block, check the head of the log for torn
1129 * writes. Search again until we hit the tail or the maximum number of
1130 * log record I/Os that could have been in flight at one time. Use a
1131 * temporary buffer so we don't trash the rhead/bp pointer from the
1132 * call above.
1133 */ 1116 */
1134 tmp_bp = xlog_get_bp(log, 1); 1117 tmp_bp = xlog_get_bp(log, 1);
1135 if (!tmp_bp) 1118 if (!tmp_bp)
@@ -1216,6 +1199,115 @@ xlog_verify_head(
1216} 1199}
1217 1200
1218/* 1201/*
1202 * Check whether the head of the log points to an unmount record. In other
1203 * words, determine whether the log is clean. If so, update the in-core state
1204 * appropriately.
1205 */
1206static int
1207xlog_check_unmount_rec(
1208 struct xlog *log,
1209 xfs_daddr_t *head_blk,
1210 xfs_daddr_t *tail_blk,
1211 struct xlog_rec_header *rhead,
1212 xfs_daddr_t rhead_blk,
1213 struct xfs_buf *bp,
1214 bool *clean)
1215{
1216 struct xlog_op_header *op_head;
1217 xfs_daddr_t umount_data_blk;
1218 xfs_daddr_t after_umount_blk;
1219 int hblks;
1220 int error;
1221 char *offset;
1222
1223 *clean = false;
1224
1225 /*
1226 * Look for unmount record. If we find it, then we know there was a
1227 * clean unmount. Since 'i' could be the last block in the physical
1228 * log, we convert to a log block before comparing to the head_blk.
1229 *
1230 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
1231 * below. We won't want to clear the unmount record if there is one, so
1232 * we pass the lsn of the unmount record rather than the block after it.
1233 */
1234 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1235 int h_size = be32_to_cpu(rhead->h_size);
1236 int h_version = be32_to_cpu(rhead->h_version);
1237
1238 if ((h_version & XLOG_VERSION_2) &&
1239 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1240 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1241 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1242 hblks++;
1243 } else {
1244 hblks = 1;
1245 }
1246 } else {
1247 hblks = 1;
1248 }
1249 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1250 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1251 if (*head_blk == after_umount_blk &&
1252 be32_to_cpu(rhead->h_num_logops) == 1) {
1253 umount_data_blk = rhead_blk + hblks;
1254 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1255 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1256 if (error)
1257 return error;
1258
1259 op_head = (struct xlog_op_header *)offset;
1260 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1261 /*
1262 * Set tail and last sync so that newly written log
1263 * records will point recovery to after the current
1264 * unmount record.
1265 */
1266 xlog_assign_atomic_lsn(&log->l_tail_lsn,
1267 log->l_curr_cycle, after_umount_blk);
1268 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1269 log->l_curr_cycle, after_umount_blk);
1270 *tail_blk = after_umount_blk;
1271
1272 *clean = true;
1273 }
1274 }
1275
1276 return 0;
1277}
1278
1279static void
1280xlog_set_state(
1281 struct xlog *log,
1282 xfs_daddr_t head_blk,
1283 struct xlog_rec_header *rhead,
1284 xfs_daddr_t rhead_blk,
1285 bool bump_cycle)
1286{
1287 /*
1288 * Reset log values according to the state of the log when we
1289 * crashed. In the case where head_blk == 0, we bump curr_cycle
1290 * one because the next write starts a new cycle rather than
1291 * continuing the cycle of the last good log record. At this
1292 * point we have guaranteed that all partial log records have been
1293 * accounted for. Therefore, we know that the last good log record
1294 * written was complete and ended exactly on the end boundary
1295 * of the physical log.
1296 */
1297 log->l_prev_block = rhead_blk;
1298 log->l_curr_block = (int)head_blk;
1299 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1300 if (bump_cycle)
1301 log->l_curr_cycle++;
1302 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1303 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1304 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1305 BBTOB(log->l_curr_block));
1306 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1307 BBTOB(log->l_curr_block));
1308}
1309
1310/*
1219 * Find the sync block number or the tail of the log. 1311 * Find the sync block number or the tail of the log.
1220 * 1312 *
1221 * This will be the block number of the last record to have its 1313 * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@ xlog_find_tail(
1238 xfs_daddr_t *tail_blk) 1330 xfs_daddr_t *tail_blk)
1239{ 1331{
1240 xlog_rec_header_t *rhead; 1332 xlog_rec_header_t *rhead;
1241 xlog_op_header_t *op_head;
1242 char *offset = NULL; 1333 char *offset = NULL;
1243 xfs_buf_t *bp; 1334 xfs_buf_t *bp;
1244 int error; 1335 int error;
1245 xfs_daddr_t umount_data_blk;
1246 xfs_daddr_t after_umount_blk;
1247 xfs_daddr_t rhead_blk; 1336 xfs_daddr_t rhead_blk;
1248 xfs_lsn_t tail_lsn; 1337 xfs_lsn_t tail_lsn;
1249 int hblks;
1250 bool wrapped = false; 1338 bool wrapped = false;
1339 bool clean = false;
1251 1340
1252 /* 1341 /*
1253 * Find previous log record 1342 * Find previous log record
1254 */ 1343 */
1255 if ((error = xlog_find_head(log, head_blk))) 1344 if ((error = xlog_find_head(log, head_blk)))
1256 return error; 1345 return error;
1346 ASSERT(*head_blk < INT_MAX);
1257 1347
1258 bp = xlog_get_bp(log, 1); 1348 bp = xlog_get_bp(log, 1);
1259 if (!bp) 1349 if (!bp)
@@ -1271,100 +1361,75 @@ xlog_find_tail(
1271 } 1361 }
1272 1362
1273 /* 1363 /*
1274 * Trim the head block back to skip over torn records. We can have 1364 * Search backwards through the log looking for the log record header
1275 * multiple log I/Os in flight at any time, so we assume CRC failures 1365 * block. This wraps all the way back around to the head so something is
1276 * back through the previous several records are torn writes and skip 1366 * seriously wrong if we can't find it.
1277 * them.
1278 */ 1367 */
1279 ASSERT(*head_blk < INT_MAX); 1368 error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
1280 error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, 1369 &rhead_blk, &rhead, &wrapped);
1281 &rhead, &wrapped); 1370 if (error < 0)
1282 if (error) 1371 return error;
1283 goto done; 1372 if (!error) {
1373 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1374 return -EIO;
1375 }
1376 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
1284 1377
1285 /* 1378 /*
1286 * Reset log values according to the state of the log when we 1379 * Set the log state based on the current head record.
1287 * crashed. In the case where head_blk == 0, we bump curr_cycle
1288 * one because the next write starts a new cycle rather than
1289 * continuing the cycle of the last good log record. At this
1290 * point we have guaranteed that all partial log records have been
1291 * accounted for. Therefore, we know that the last good log record
1292 * written was complete and ended exactly on the end boundary
1293 * of the physical log.
1294 */ 1380 */
1295 log->l_prev_block = rhead_blk; 1381 xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
1296 log->l_curr_block = (int)*head_blk; 1382 tail_lsn = atomic64_read(&log->l_tail_lsn);
1297 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1298 if (wrapped)
1299 log->l_curr_cycle++;
1300 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1301 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1302 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1303 BBTOB(log->l_curr_block));
1304 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1305 BBTOB(log->l_curr_block));
1306 1383
1307 /* 1384 /*
1308 * Look for unmount record. If we find it, then we know there 1385 * Look for an unmount record at the head of the log. This sets the log
1309 * was a clean unmount. Since 'i' could be the last block in 1386 * state to determine whether recovery is necessary.
1310 * the physical log, we convert to a log block before comparing 1387 */
1311 * to the head_blk. 1388 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
1389 rhead_blk, bp, &clean);
1390 if (error)
1391 goto done;
1392
1393 /*
1394 * Verify the log head if the log is not clean (e.g., we have anything
1395 * but an unmount record at the head). This uses CRC verification to
1396 * detect and trim torn writes. If discovered, CRC failures are
1397 * considered torn writes and the log head is trimmed accordingly.
1312 * 1398 *
1313 * Save the current tail lsn to use to pass to 1399 * Note that we can only run CRC verification when the log is dirty
1314 * xlog_clear_stale_blocks() below. We won't want to clear the 1400 * because there's no guarantee that the log data behind an unmount
1315 * unmount record if there is one, so we pass the lsn of the 1401 * record is compatible with the current architecture.
1316 * unmount record rather than the block after it.
1317 */ 1402 */
1318 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 1403 if (!clean) {
1319 int h_size = be32_to_cpu(rhead->h_size); 1404 xfs_daddr_t orig_head = *head_blk;
1320 int h_version = be32_to_cpu(rhead->h_version);
1321 1405
1322 if ((h_version & XLOG_VERSION_2) && 1406 error = xlog_verify_head(log, head_blk, tail_blk, bp,
1323 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 1407 &rhead_blk, &rhead, &wrapped);
1324 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1325 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1326 hblks++;
1327 } else {
1328 hblks = 1;
1329 }
1330 } else {
1331 hblks = 1;
1332 }
1333 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1334 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1335 tail_lsn = atomic64_read(&log->l_tail_lsn);
1336 if (*head_blk == after_umount_blk &&
1337 be32_to_cpu(rhead->h_num_logops) == 1) {
1338 umount_data_blk = rhead_blk + hblks;
1339 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1340 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1341 if (error) 1408 if (error)
1342 goto done; 1409 goto done;
1343 1410
1344 op_head = (xlog_op_header_t *)offset; 1411 /* update in-core state again if the head changed */
1345 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1412 if (*head_blk != orig_head) {
1346 /* 1413 xlog_set_state(log, *head_blk, rhead, rhead_blk,
1347 * Set tail and last sync so that newly written 1414 wrapped);
1348 * log records will point recovery to after the 1415 tail_lsn = atomic64_read(&log->l_tail_lsn);
1349 * current unmount record. 1416 error = xlog_check_unmount_rec(log, head_blk, tail_blk,
1350 */ 1417 rhead, rhead_blk, bp,
1351 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1418 &clean);
1352 log->l_curr_cycle, after_umount_blk); 1419 if (error)
1353 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1420 goto done;
1354 log->l_curr_cycle, after_umount_blk);
1355 *tail_blk = after_umount_blk;
1356
1357 /*
1358 * Note that the unmount was clean. If the unmount
1359 * was not clean, we need to know this to rebuild the
1360 * superblock counters from the perag headers if we
1361 * have a filesystem using non-persistent counters.
1362 */
1363 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1364 } 1421 }
1365 } 1422 }
1366 1423
1367 /* 1424 /*
1425 * Note that the unmount was clean. If the unmount was not clean, we
1426 * need to know this to rebuild the superblock counters from the perag
1427 * headers if we have a filesystem using non-persistent counters.
1428 */
1429 if (clean)
1430 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1431
1432 /*
1368 * Make sure that there are no blocks in front of the head 1433 * Make sure that there are no blocks in front of the head
1369 * with the same cycle number as the head. This can happen 1434 * with the same cycle number as the head. This can happen
1370 * because we allow multiple outstanding log writes concurrently, 1435 * because we allow multiple outstanding log writes concurrently,
@@ -2473,6 +2538,13 @@ xlog_recover_validate_buf_type(
2473 } 2538 }
2474 bp->b_ops = &xfs_sb_buf_ops; 2539 bp->b_ops = &xfs_sb_buf_ops;
2475 break; 2540 break;
2541#ifdef CONFIG_XFS_RT
2542 case XFS_BLFT_RTBITMAP_BUF:
2543 case XFS_BLFT_RTSUMMARY_BUF:
2544 /* no magic numbers for verification of RT buffers */
2545 bp->b_ops = &xfs_rtbuf_ops;
2546 break;
2547#endif /* CONFIG_XFS_RT */
2476 default: 2548 default:
2477 xfs_warn(mp, "Unknown buffer type %d!", 2549 xfs_warn(mp, "Unknown buffer type %d!",
2478 xfs_blft_from_flags(buf_f)); 2550 xfs_blft_from_flags(buf_f));
@@ -4491,7 +4563,7 @@ xlog_recover_process(
4491 * know precisely what failed. 4563 * know precisely what failed.
4492 */ 4564 */
4493 if (pass == XLOG_RECOVER_CRCPASS) { 4565 if (pass == XLOG_RECOVER_CRCPASS) {
4494 if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc)) 4566 if (rhead->h_crc && crc != rhead->h_crc)
4495 return -EFSBADCRC; 4567 return -EFSBADCRC;
4496 return 0; 4568 return 0;
4497 } 4569 }
@@ -4502,7 +4574,7 @@ xlog_recover_process(
4502 * zero CRC check prevents warnings from being emitted when upgrading 4574 * zero CRC check prevents warnings from being emitted when upgrading
4503 * the kernel from one that does not add CRCs by default. 4575 * the kernel from one that does not add CRCs by default.
4504 */ 4576 */
4505 if (crc != le32_to_cpu(rhead->h_crc)) { 4577 if (crc != rhead->h_crc) {
4506 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 4578 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
4507 xfs_alert(log->l_mp, 4579 xfs_alert(log->l_mp,
4508 "log record CRC mismatch: found 0x%x, expected 0x%x.", 4580 "log record CRC mismatch: found 0x%x, expected 0x%x.",
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b57098481c10..a4e03ab50342 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -327,7 +327,6 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
327 bool reserved); 327 bool reserved);
328extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); 328extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
329 329
330extern int xfs_mount_log_sb(xfs_mount_t *);
331extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 330extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
332extern int xfs_readsb(xfs_mount_t *, int); 331extern int xfs_readsb(xfs_mount_t *, int);
333extern void xfs_freesb(xfs_mount_t *); 332extern void xfs_freesb(xfs_mount_t *);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 532ab79d38fe..be125e1758c1 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -560,6 +560,37 @@ xfs_qm_shrink_count(
560 return list_lru_shrink_count(&qi->qi_lru, sc); 560 return list_lru_shrink_count(&qi->qi_lru, sc);
561} 561}
562 562
563STATIC void
564xfs_qm_set_defquota(
565 xfs_mount_t *mp,
566 uint type,
567 xfs_quotainfo_t *qinf)
568{
569 xfs_dquot_t *dqp;
570 struct xfs_def_quota *defq;
571 int error;
572
573 error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
574
575 if (!error) {
576 xfs_disk_dquot_t *ddqp = &dqp->q_core;
577
578 defq = xfs_get_defquota(dqp, qinf);
579
580 /*
581 * Timers and warnings have been already set, let's just set the
582 * default limits for this quota type
583 */
584 defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
585 defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
586 defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
587 defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
588 defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
589 defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
590 xfs_qm_dqdestroy(dqp);
591 }
592}
593
563/* 594/*
564 * This initializes all the quota information that's kept in the 595 * This initializes all the quota information that's kept in the
565 * mount structure 596 * mount structure
@@ -606,19 +637,19 @@ xfs_qm_init_quotainfo(
606 * We try to get the limits from the superuser's limits fields. 637 * We try to get the limits from the superuser's limits fields.
607 * This is quite hacky, but it is standard quota practice. 638 * This is quite hacky, but it is standard quota practice.
608 * 639 *
609 * We look at the USR dquot with id == 0 first, but if user quotas
610 * are not enabled we goto the GRP dquot with id == 0.
611 * We don't really care to keep separate default limits for user
612 * and group quotas, at least not at this point.
613 *
614 * Since we may not have done a quotacheck by this point, just read 640 * Since we may not have done a quotacheck by this point, just read
615 * the dquot without attaching it to any hashtables or lists. 641 * the dquot without attaching it to any hashtables or lists.
642 *
643 * Timers and warnings are globally set by the first timer found in
644 * user/group/proj quota types, otherwise a default value is used.
645 * This should be split into different fields per quota type.
616 */ 646 */
617 error = xfs_qm_dqread(mp, 0, 647 error = xfs_qm_dqread(mp, 0,
618 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 648 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
619 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 649 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
620 XFS_DQ_PROJ), 650 XFS_DQ_PROJ),
621 XFS_QMOPT_DOWARN, &dqp); 651 XFS_QMOPT_DOWARN, &dqp);
652
622 if (!error) { 653 if (!error) {
623 xfs_disk_dquot_t *ddqp = &dqp->q_core; 654 xfs_disk_dquot_t *ddqp = &dqp->q_core;
624 655
@@ -639,13 +670,6 @@ xfs_qm_init_quotainfo(
639 be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; 670 be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
640 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? 671 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
641 be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; 672 be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
642 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
643 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
644 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
645 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
646 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
647 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
648
649 xfs_qm_dqdestroy(dqp); 673 xfs_qm_dqdestroy(dqp);
650 } else { 674 } else {
651 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 675 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -656,6 +680,13 @@ xfs_qm_init_quotainfo(
656 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; 680 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
657 } 681 }
658 682
683 if (XFS_IS_UQUOTA_RUNNING(mp))
684 xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
685 if (XFS_IS_GQUOTA_RUNNING(mp))
686 xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf);
687 if (XFS_IS_PQUOTA_RUNNING(mp))
688 xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf);
689
659 qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; 690 qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
660 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; 691 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
661 qinf->qi_shrinker.seeks = DEFAULT_SEEKS; 692 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 996a04064894..2975a822e9f0 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -53,6 +53,15 @@ extern struct kmem_zone *xfs_qm_dqtrxzone;
53 */ 53 */
54#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 54#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
55 55
56struct xfs_def_quota {
57 xfs_qcnt_t bhardlimit; /* default data blk hard limit */
58 xfs_qcnt_t bsoftlimit; /* default data blk soft limit */
59 xfs_qcnt_t ihardlimit; /* default inode count hard limit */
60 xfs_qcnt_t isoftlimit; /* default inode count soft limit */
61 xfs_qcnt_t rtbhardlimit; /* default realtime blk hard limit */
62 xfs_qcnt_t rtbsoftlimit; /* default realtime blk soft limit */
63};
64
56/* 65/*
57 * Various quota information for individual filesystems. 66 * Various quota information for individual filesystems.
58 * The mount structure keeps a pointer to this. 67 * The mount structure keeps a pointer to this.
@@ -76,12 +85,9 @@ typedef struct xfs_quotainfo {
76 struct mutex qi_quotaofflock;/* to serialize quotaoff */ 85 struct mutex qi_quotaofflock;/* to serialize quotaoff */
77 xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ 86 xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
78 uint qi_dqperchunk; /* # ondisk dqs in above chunk */ 87 uint qi_dqperchunk; /* # ondisk dqs in above chunk */
79 xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ 88 struct xfs_def_quota qi_usr_default;
80 xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ 89 struct xfs_def_quota qi_grp_default;
81 xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ 90 struct xfs_def_quota qi_prj_default;
82 xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
83 xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
84 xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
85 struct shrinker qi_shrinker; 91 struct shrinker qi_shrinker;
86} xfs_quotainfo_t; 92} xfs_quotainfo_t;
87 93
@@ -104,15 +110,15 @@ xfs_dquot_tree(
104} 110}
105 111
106static inline struct xfs_inode * 112static inline struct xfs_inode *
107xfs_dq_to_quota_inode(struct xfs_dquot *dqp) 113xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
108{ 114{
109 switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { 115 switch (dq_flags & XFS_DQ_ALLTYPES) {
110 case XFS_DQ_USER: 116 case XFS_DQ_USER:
111 return dqp->q_mount->m_quotainfo->qi_uquotaip; 117 return mp->m_quotainfo->qi_uquotaip;
112 case XFS_DQ_GROUP: 118 case XFS_DQ_GROUP:
113 return dqp->q_mount->m_quotainfo->qi_gquotaip; 119 return mp->m_quotainfo->qi_gquotaip;
114 case XFS_DQ_PROJ: 120 case XFS_DQ_PROJ:
115 return dqp->q_mount->m_quotainfo->qi_pquotaip; 121 return mp->m_quotainfo->qi_pquotaip;
116 default: 122 default:
117 ASSERT(0); 123 ASSERT(0);
118 } 124 }
@@ -164,11 +170,27 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
164 170
165/* quota ops */ 171/* quota ops */
166extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); 172extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
167extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, 173extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
168 uint, struct qc_dqblk *); 174 uint, struct qc_dqblk *, uint);
169extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, 175extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
170 struct qc_dqblk *); 176 struct qc_dqblk *);
171extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); 177extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint);
172extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); 178extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
173 179
180static inline struct xfs_def_quota *
181xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
182{
183 struct xfs_def_quota *defq;
184
185 if (XFS_QM_ISUDQ(dqp))
186 defq = &qi->qi_usr_default;
187 else if (XFS_QM_ISGDQ(dqp))
188 defq = &qi->qi_grp_default;
189 else {
190 ASSERT(XFS_QM_ISPDQ(dqp));
191 defq = &qi->qi_prj_default;
192 }
193 return defq;
194}
195
174#endif /* __XFS_QM_H__ */ 196#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3640c6e896af..f4d0e0a8f517 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -404,6 +404,7 @@ xfs_qm_scall_setqlim(
404 struct xfs_disk_dquot *ddq; 404 struct xfs_disk_dquot *ddq;
405 struct xfs_dquot *dqp; 405 struct xfs_dquot *dqp;
406 struct xfs_trans *tp; 406 struct xfs_trans *tp;
407 struct xfs_def_quota *defq;
407 int error; 408 int error;
408 xfs_qcnt_t hard, soft; 409 xfs_qcnt_t hard, soft;
409 410
@@ -431,6 +432,8 @@ xfs_qm_scall_setqlim(
431 ASSERT(error != -ENOENT); 432 ASSERT(error != -ENOENT);
432 goto out_unlock; 433 goto out_unlock;
433 } 434 }
435
436 defq = xfs_get_defquota(dqp, q);
434 xfs_dqunlock(dqp); 437 xfs_dqunlock(dqp);
435 438
436 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); 439 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -458,8 +461,8 @@ xfs_qm_scall_setqlim(
458 ddq->d_blk_softlimit = cpu_to_be64(soft); 461 ddq->d_blk_softlimit = cpu_to_be64(soft);
459 xfs_dquot_set_prealloc_limits(dqp); 462 xfs_dquot_set_prealloc_limits(dqp);
460 if (id == 0) { 463 if (id == 0) {
461 q->qi_bhardlimit = hard; 464 defq->bhardlimit = hard;
462 q->qi_bsoftlimit = soft; 465 defq->bsoftlimit = soft;
463 } 466 }
464 } else { 467 } else {
465 xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); 468 xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
@@ -474,8 +477,8 @@ xfs_qm_scall_setqlim(
474 ddq->d_rtb_hardlimit = cpu_to_be64(hard); 477 ddq->d_rtb_hardlimit = cpu_to_be64(hard);
475 ddq->d_rtb_softlimit = cpu_to_be64(soft); 478 ddq->d_rtb_softlimit = cpu_to_be64(soft);
476 if (id == 0) { 479 if (id == 0) {
477 q->qi_rtbhardlimit = hard; 480 defq->rtbhardlimit = hard;
478 q->qi_rtbsoftlimit = soft; 481 defq->rtbsoftlimit = soft;
479 } 482 }
480 } else { 483 } else {
481 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); 484 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
@@ -491,8 +494,8 @@ xfs_qm_scall_setqlim(
491 ddq->d_ino_hardlimit = cpu_to_be64(hard); 494 ddq->d_ino_hardlimit = cpu_to_be64(hard);
492 ddq->d_ino_softlimit = cpu_to_be64(soft); 495 ddq->d_ino_softlimit = cpu_to_be64(soft);
493 if (id == 0) { 496 if (id == 0) {
494 q->qi_ihardlimit = hard; 497 defq->ihardlimit = hard;
495 q->qi_isoftlimit = soft; 498 defq->isoftlimit = soft;
496 } 499 }
497 } else { 500 } else {
498 xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft); 501 xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
@@ -635,9 +638,10 @@ out:
635int 638int
636xfs_qm_scall_getquota( 639xfs_qm_scall_getquota(
637 struct xfs_mount *mp, 640 struct xfs_mount *mp,
638 xfs_dqid_t id, 641 xfs_dqid_t *id,
639 uint type, 642 uint type,
640 struct qc_dqblk *dst) 643 struct qc_dqblk *dst,
644 uint dqget_flags)
641{ 645{
642 struct xfs_dquot *dqp; 646 struct xfs_dquot *dqp;
643 int error; 647 int error;
@@ -647,7 +651,7 @@ xfs_qm_scall_getquota(
647 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't 651 * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
648 * exist, we'll get ENOENT back. 652 * exist, we'll get ENOENT back.
649 */ 653 */
650 error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp); 654 error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
651 if (error) 655 if (error)
652 return error; 656 return error;
653 657
@@ -660,6 +664,9 @@ xfs_qm_scall_getquota(
660 goto out_put; 664 goto out_put;
661 } 665 }
662 666
667 /* Fill in the ID we actually read from disk */
668 *id = be32_to_cpu(dqp->q_core.d_id);
669
663 memset(dst, 0, sizeof(*dst)); 670 memset(dst, 0, sizeof(*dst));
664 dst->d_spc_hardlimit = 671 dst->d_spc_hardlimit =
665 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); 672 XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
@@ -701,7 +708,7 @@ xfs_qm_scall_getquota(
701 if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) || 708 if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
702 (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) || 709 (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
703 (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) && 710 (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
704 id != 0) { 711 *id != 0) {
705 if ((dst->d_space > dst->d_spc_softlimit) && 712 if ((dst->d_space > dst->d_spc_softlimit) &&
706 (dst->d_spc_softlimit > 0)) { 713 (dst->d_spc_softlimit > 0)) {
707 ASSERT(dst->d_spc_timer != 0); 714 ASSERT(dst->d_spc_timer != 0);
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 7795e0d01382..f82d79a8c694 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -231,14 +231,45 @@ xfs_fs_get_dqblk(
231 struct qc_dqblk *qdq) 231 struct qc_dqblk *qdq)
232{ 232{
233 struct xfs_mount *mp = XFS_M(sb); 233 struct xfs_mount *mp = XFS_M(sb);
234 xfs_dqid_t id;
234 235
235 if (!XFS_IS_QUOTA_RUNNING(mp)) 236 if (!XFS_IS_QUOTA_RUNNING(mp))
236 return -ENOSYS; 237 return -ENOSYS;
237 if (!XFS_IS_QUOTA_ON(mp)) 238 if (!XFS_IS_QUOTA_ON(mp))
238 return -ESRCH; 239 return -ESRCH;
239 240
240 return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), 241 id = from_kqid(&init_user_ns, qid);
241 xfs_quota_type(qid.type), qdq); 242 return xfs_qm_scall_getquota(mp, &id,
243 xfs_quota_type(qid.type), qdq, 0);
244}
245
246/* Return quota info for active quota >= this qid */
247STATIC int
248xfs_fs_get_nextdqblk(
249 struct super_block *sb,
250 struct kqid *qid,
251 struct qc_dqblk *qdq)
252{
253 int ret;
254 struct xfs_mount *mp = XFS_M(sb);
255 xfs_dqid_t id;
256
257 if (!XFS_IS_QUOTA_RUNNING(mp))
258 return -ENOSYS;
259 if (!XFS_IS_QUOTA_ON(mp))
260 return -ESRCH;
261
262 id = from_kqid(&init_user_ns, *qid);
263 ret = xfs_qm_scall_getquota(mp, &id,
264 xfs_quota_type(qid->type), qdq,
265 XFS_QMOPT_DQNEXT);
266 if (ret)
267 return ret;
268
269 /* ID may be different, so convert back what we got */
270 *qid = make_kqid(current_user_ns(), qid->type, id);
271 return 0;
272
242} 273}
243 274
244STATIC int 275STATIC int
@@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = {
267 .quota_disable = xfs_quota_disable, 298 .quota_disable = xfs_quota_disable,
268 .rm_xquota = xfs_fs_rm_xquota, 299 .rm_xquota = xfs_fs_rm_xquota,
269 .get_dqblk = xfs_fs_get_dqblk, 300 .get_dqblk = xfs_fs_get_dqblk,
301 .get_nextdqblk = xfs_fs_get_nextdqblk,
270 .set_dqblk = xfs_fs_set_dqblk, 302 .set_dqblk = xfs_fs_set_dqblk,
271}; 303};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 391d797cb53f..c8d58426008e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
1296DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); 1296DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
1297DEFINE_IOMAP_EVENT(xfs_get_blocks_found); 1297DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
1298DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); 1298DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
1299DEFINE_IOMAP_EVENT(xfs_gbmap_direct); 1299DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
1300DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
1301DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
1302DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
1303DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
1304 1300
1305DECLARE_EVENT_CLASS(xfs_simple_io_class, 1301DECLARE_EVENT_CLASS(xfs_simple_io_class,
1306 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), 1302 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
1340DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); 1336DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
1341DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); 1337DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
1342DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); 1338DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
1339DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
1340DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
1341DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
1343 1342
1344DECLARE_EVENT_CLASS(xfs_itrunc_class, 1343DECLARE_EVENT_CLASS(xfs_itrunc_class,
1345 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), 1344 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 4f18fd92ca13..d6c9c3e9e02b 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,6 +497,7 @@ xfsaild(
497 long tout = 0; /* milliseconds */ 497 long tout = 0; /* milliseconds */
498 498
499 current->flags |= PF_MEMALLOC; 499 current->flags |= PF_MEMALLOC;
500 set_freezable();
500 501
501 while (!kthread_should_stop()) { 502 while (!kthread_should_stop()) {
502 if (tout && tout <= 20) 503 if (tout && tout <= 20)
@@ -519,14 +520,14 @@ xfsaild(
519 if (!xfs_ail_min(ailp) && 520 if (!xfs_ail_min(ailp) &&
520 ailp->xa_target == ailp->xa_target_prev) { 521 ailp->xa_target == ailp->xa_target_prev) {
521 spin_unlock(&ailp->xa_lock); 522 spin_unlock(&ailp->xa_lock);
522 schedule(); 523 freezable_schedule();
523 tout = 0; 524 tout = 0;
524 continue; 525 continue;
525 } 526 }
526 spin_unlock(&ailp->xa_lock); 527 spin_unlock(&ailp->xa_lock);
527 528
528 if (tout) 529 if (tout)
529 schedule_timeout(msecs_to_jiffies(tout)); 530 freezable_schedule_timeout(msecs_to_jiffies(tout));
530 531
531 __set_current_state(TASK_RUNNING); 532 __set_current_state(TASK_RUNNING);
532 533
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 995170194df0..c3d547211d16 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -609,17 +609,20 @@ xfs_trans_dqresv(
609 xfs_qcnt_t total_count; 609 xfs_qcnt_t total_count;
610 xfs_qcnt_t *resbcountp; 610 xfs_qcnt_t *resbcountp;
611 xfs_quotainfo_t *q = mp->m_quotainfo; 611 xfs_quotainfo_t *q = mp->m_quotainfo;
612 struct xfs_def_quota *defq;
612 613
613 614
614 xfs_dqlock(dqp); 615 xfs_dqlock(dqp);
615 616
617 defq = xfs_get_defquota(dqp, q);
618
616 if (flags & XFS_TRANS_DQ_RES_BLKS) { 619 if (flags & XFS_TRANS_DQ_RES_BLKS) {
617 hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); 620 hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
618 if (!hardlimit) 621 if (!hardlimit)
619 hardlimit = q->qi_bhardlimit; 622 hardlimit = defq->bhardlimit;
620 softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); 623 softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
621 if (!softlimit) 624 if (!softlimit)
622 softlimit = q->qi_bsoftlimit; 625 softlimit = defq->bsoftlimit;
623 timer = be32_to_cpu(dqp->q_core.d_btimer); 626 timer = be32_to_cpu(dqp->q_core.d_btimer);
624 warns = be16_to_cpu(dqp->q_core.d_bwarns); 627 warns = be16_to_cpu(dqp->q_core.d_bwarns);
625 warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; 628 warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
@@ -628,10 +631,10 @@ xfs_trans_dqresv(
628 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); 631 ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
629 hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); 632 hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
630 if (!hardlimit) 633 if (!hardlimit)
631 hardlimit = q->qi_rtbhardlimit; 634 hardlimit = defq->rtbhardlimit;
632 softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); 635 softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
633 if (!softlimit) 636 if (!softlimit)
634 softlimit = q->qi_rtbsoftlimit; 637 softlimit = defq->rtbsoftlimit;
635 timer = be32_to_cpu(dqp->q_core.d_rtbtimer); 638 timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
636 warns = be16_to_cpu(dqp->q_core.d_rtbwarns); 639 warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
637 warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; 640 warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
@@ -672,10 +675,10 @@ xfs_trans_dqresv(
672 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; 675 warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
673 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); 676 hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
674 if (!hardlimit) 677 if (!hardlimit)
675 hardlimit = q->qi_ihardlimit; 678 hardlimit = defq->ihardlimit;
676 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); 679 softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
677 if (!softlimit) 680 if (!softlimit)
678 softlimit = q->qi_isoftlimit; 681 softlimit = defq->isoftlimit;
679 682
680 if (hardlimit && total_count > hardlimit) { 683 if (hardlimit && total_count > hardlimit) {
681 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); 684 xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1a2046275cdf..d7f37bfcbdce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -70,7 +70,7 @@ extern int sysctl_protected_hardlinks;
70struct buffer_head; 70struct buffer_head;
71typedef int (get_block_t)(struct inode *inode, sector_t iblock, 71typedef int (get_block_t)(struct inode *inode, sector_t iblock,
72 struct buffer_head *bh_result, int create); 72 struct buffer_head *bh_result, int create);
73typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, 73typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
74 ssize_t bytes, void *private); 74 ssize_t bytes, void *private);
75typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); 75typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate);
76 76
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b2505acfd3c0..fba92f5c1a63 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -425,6 +425,8 @@ struct quotactl_ops {
425 int (*quota_sync)(struct super_block *, int); 425 int (*quota_sync)(struct super_block *, int);
426 int (*set_info)(struct super_block *, int, struct qc_info *); 426 int (*set_info)(struct super_block *, int, struct qc_info *);
427 int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); 427 int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
428 int (*get_nextdqblk)(struct super_block *, struct kqid *,
429 struct qc_dqblk *);
428 int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); 430 int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *);
429 int (*get_state)(struct super_block *, struct qc_state *); 431 int (*get_state)(struct super_block *, struct qc_state *);
430 int (*rm_xquota)(struct super_block *, unsigned int); 432 int (*rm_xquota)(struct super_block *, unsigned int);
diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h
index dcd75cc26196..11b3b31faf14 100644
--- a/include/uapi/linux/dqblk_xfs.h
+++ b/include/uapi/linux/dqblk_xfs.h
@@ -39,6 +39,7 @@
39#define Q_XQUOTARM XQM_CMD(6) /* free disk space used by dquots */ 39#define Q_XQUOTARM XQM_CMD(6) /* free disk space used by dquots */
40#define Q_XQUOTASYNC XQM_CMD(7) /* delalloc flush, updates dquots */ 40#define Q_XQUOTASYNC XQM_CMD(7) /* delalloc flush, updates dquots */
41#define Q_XGETQSTATV XQM_CMD(8) /* newer version of get quota */ 41#define Q_XGETQSTATV XQM_CMD(8) /* newer version of get quota */
42#define Q_XGETNEXTQUOTA XQM_CMD(9) /* get disk limits and usage >= ID */
42 43
43/* 44/*
44 * fs_disk_quota structure: 45 * fs_disk_quota structure:
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index 9c95b2c1c88a..38baddb807f5 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -71,6 +71,7 @@
71#define Q_SETINFO 0x800006 /* set information about quota files */ 71#define Q_SETINFO 0x800006 /* set information about quota files */
72#define Q_GETQUOTA 0x800007 /* get user quota structure */ 72#define Q_GETQUOTA 0x800007 /* get user quota structure */
73#define Q_SETQUOTA 0x800008 /* set user quota structure */ 73#define Q_SETQUOTA 0x800008 /* set user quota structure */
74#define Q_GETNEXTQUOTA 0x800009 /* get disk limits and usage >= ID */
74 75
75/* Quota format type IDs */ 76/* Quota format type IDs */
76#define QFMT_VFS_OLD 1 77#define QFMT_VFS_OLD 1
@@ -119,6 +120,19 @@ struct if_dqblk {
119 __u32 dqb_valid; 120 __u32 dqb_valid;
120}; 121};
121 122
123struct if_nextdqblk {
124 __u64 dqb_bhardlimit;
125 __u64 dqb_bsoftlimit;
126 __u64 dqb_curspace;
127 __u64 dqb_ihardlimit;
128 __u64 dqb_isoftlimit;
129 __u64 dqb_curinodes;
130 __u64 dqb_btime;
131 __u64 dqb_itime;
132 __u32 dqb_valid;
133 __u32 dqb_id;
134};
135
122/* 136/*
123 * Structure used for setting quota information about file via quotactl 137 * Structure used for setting quota information about file via quotactl
124 * Following flags are used to specify which fields are valid 138 * Following flags are used to specify which fields are valid