aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-01-27 15:44:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-01-27 15:44:32 -0500
commit3365135d43f861003555c963b309672d053a2228 (patch)
treebfef4adec5da118bf1b3df7e5cff74f45af9e02d
parent5906374446386fd16fe562b042429d905d231ec3 (diff)
parente0d76fa4475ef2cf4b52d18588b8ce95153d021b (diff)
Merge tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs uodates from Darrick Wong: "I have some more fixes this week: better input validation, corruption avoidance, build fixes, memory leak fixes, and a couple from Christoph to avoid an ENOSPC failure. Summary: - Fix race conditions in the CoW code - Fix some incorrect input validation checks - Avoid crashing fs by running out of space when freeing inodes - Fix toctou race wrt whether or not an inode has an attr - Fix build error on arm - Fix page refcount corruption when readahead fails - Don't corrupt userspace in the bmap ioctl" * tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: prevent quotacheck from overloading inode lru xfs: fix bmv_count confusion w/ shared extents xfs: clear _XBF_PAGES from buffers when readahead page xfs: extsize hints are not unlikely in xfs_bmap_btalloc xfs: remove racy hasattr check from attr ops xfs: use per-AG reservations for the finobt xfs: only update mount/resv fields on success in __xfs_ag_resv_init xfs: verify dirblocklog correctly xfs: fix COW writeback race
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c70
-rw-r--r--fs/xfs/libxfs/xfs_attr.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c48
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c90
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c28
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_inode.c23
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_qm.c3
13 files changed, 220 insertions, 63 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index d346d42c54d1..33db69be4832 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -39,6 +39,7 @@
39#include "xfs_rmap_btree.h" 39#include "xfs_rmap_btree.h"
40#include "xfs_btree.h" 40#include "xfs_btree.h"
41#include "xfs_refcount_btree.h" 41#include "xfs_refcount_btree.h"
42#include "xfs_ialloc_btree.h"
42 43
43/* 44/*
44 * Per-AG Block Reservations 45 * Per-AG Block Reservations
@@ -200,22 +201,30 @@ __xfs_ag_resv_init(
200 struct xfs_mount *mp = pag->pag_mount; 201 struct xfs_mount *mp = pag->pag_mount;
201 struct xfs_ag_resv *resv; 202 struct xfs_ag_resv *resv;
202 int error; 203 int error;
204 xfs_extlen_t reserved;
203 205
204 resv = xfs_perag_resv(pag, type);
205 if (used > ask) 206 if (used > ask)
206 ask = used; 207 ask = used;
207 resv->ar_asked = ask; 208 reserved = ask - used;
208 resv->ar_reserved = resv->ar_orig_reserved = ask - used;
209 mp->m_ag_max_usable -= ask;
210 209
211 trace_xfs_ag_resv_init(pag, type, ask); 210 error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true);
212 211 if (error) {
213 error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
214 if (error)
215 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, 212 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
216 error, _RET_IP_); 213 error, _RET_IP_);
214 xfs_warn(mp,
215"Per-AG reservation for AG %u failed. Filesystem may run out of space.",
216 pag->pag_agno);
217 return error;
218 }
217 219
218 return error; 220 mp->m_ag_max_usable -= ask;
221
222 resv = xfs_perag_resv(pag, type);
223 resv->ar_asked = ask;
224 resv->ar_reserved = resv->ar_orig_reserved = reserved;
225
226 trace_xfs_ag_resv_init(pag, type, ask);
227 return 0;
219} 228}
220 229
221/* Create a per-AG block reservation. */ 230/* Create a per-AG block reservation. */
@@ -223,6 +232,8 @@ int
223xfs_ag_resv_init( 232xfs_ag_resv_init(
224 struct xfs_perag *pag) 233 struct xfs_perag *pag)
225{ 234{
235 struct xfs_mount *mp = pag->pag_mount;
236 xfs_agnumber_t agno = pag->pag_agno;
226 xfs_extlen_t ask; 237 xfs_extlen_t ask;
227 xfs_extlen_t used; 238 xfs_extlen_t used;
228 int error = 0; 239 int error = 0;
@@ -231,23 +242,45 @@ xfs_ag_resv_init(
231 if (pag->pag_meta_resv.ar_asked == 0) { 242 if (pag->pag_meta_resv.ar_asked == 0) {
232 ask = used = 0; 243 ask = used = 0;
233 244
234 error = xfs_refcountbt_calc_reserves(pag->pag_mount, 245 error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used);
235 pag->pag_agno, &ask, &used);
236 if (error) 246 if (error)
237 goto out; 247 goto out;
238 248
239 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, 249 error = xfs_finobt_calc_reserves(mp, agno, &ask, &used);
240 ask, used);
241 if (error) 250 if (error)
242 goto out; 251 goto out;
252
253 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
254 ask, used);
255 if (error) {
256 /*
257 * Because we didn't have per-AG reservations when the
258 * finobt feature was added we might not be able to
259 * reserve all needed blocks. Warn and fall back to the
260 * old and potentially buggy code in that case, but
261 * ensure we do have the reservation for the refcountbt.
262 */
263 ask = used = 0;
264
265 mp->m_inotbt_nores = true;
266
267 error = xfs_refcountbt_calc_reserves(mp, agno, &ask,
268 &used);
269 if (error)
270 goto out;
271
272 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
273 ask, used);
274 if (error)
275 goto out;
276 }
243 } 277 }
244 278
245 /* Create the AGFL metadata reservation */ 279 /* Create the AGFL metadata reservation */
246 if (pag->pag_agfl_resv.ar_asked == 0) { 280 if (pag->pag_agfl_resv.ar_asked == 0) {
247 ask = used = 0; 281 ask = used = 0;
248 282
249 error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, 283 error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used);
250 &ask, &used);
251 if (error) 284 if (error)
252 goto out; 285 goto out;
253 286
@@ -256,9 +289,16 @@ xfs_ag_resv_init(
256 goto out; 289 goto out;
257 } 290 }
258 291
292#ifdef DEBUG
293 /* need to read in the AGF for the ASSERT below to work */
294 error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0);
295 if (error)
296 return error;
297
259 ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + 298 ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
260 xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= 299 xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <=
261 pag->pagf_freeblks + pag->pagf_flcount); 300 pag->pagf_freeblks + pag->pagf_flcount);
301#endif
262out: 302out:
263 return error; 303 return error;
264} 304}
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index af1ecb19121e..6622d46ddec3 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -131,9 +131,6 @@ xfs_attr_get(
131 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 131 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
132 return -EIO; 132 return -EIO;
133 133
134 if (!xfs_inode_hasattr(ip))
135 return -ENOATTR;
136
137 error = xfs_attr_args_init(&args, ip, name, flags); 134 error = xfs_attr_args_init(&args, ip, name, flags);
138 if (error) 135 if (error)
139 return error; 136 return error;
@@ -392,9 +389,6 @@ xfs_attr_remove(
392 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 389 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
393 return -EIO; 390 return -EIO;
394 391
395 if (!xfs_inode_hasattr(dp))
396 return -ENOATTR;
397
398 error = xfs_attr_args_init(&args, dp, name, flags); 392 error = xfs_attr_args_init(&args, dp, name, flags);
399 if (error) 393 if (error)
400 return error; 394 return error;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 44773c9eb957..bfc00de5c6f1 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3629,7 +3629,7 @@ xfs_bmap_btalloc(
3629 align = xfs_get_cowextsz_hint(ap->ip); 3629 align = xfs_get_cowextsz_hint(ap->ip);
3630 else if (xfs_alloc_is_userdata(ap->datatype)) 3630 else if (xfs_alloc_is_userdata(ap->datatype))
3631 align = xfs_get_extsz_hint(ap->ip); 3631 align = xfs_get_extsz_hint(ap->ip);
3632 if (unlikely(align)) { 3632 if (align) {
3633 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, 3633 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3634 align, 0, ap->eof, 0, ap->conv, 3634 align, 0, ap->eof, 0, ap->conv,
3635 &ap->offset, &ap->length); 3635 &ap->offset, &ap->length);
@@ -3701,7 +3701,7 @@ xfs_bmap_btalloc(
3701 args.minlen = ap->minlen; 3701 args.minlen = ap->minlen;
3702 } 3702 }
3703 /* apply extent size hints if obtained earlier */ 3703 /* apply extent size hints if obtained earlier */
3704 if (unlikely(align)) { 3704 if (align) {
3705 args.prod = align; 3705 args.prod = align;
3706 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) 3706 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3707 args.mod = (xfs_extlen_t)(args.prod - args.mod); 3707 args.mod = (xfs_extlen_t)(args.prod - args.mod);
@@ -4514,8 +4514,6 @@ xfs_bmapi_write(
4514 int n; /* current extent index */ 4514 int n; /* current extent index */
4515 xfs_fileoff_t obno; /* old block number (offset) */ 4515 xfs_fileoff_t obno; /* old block number (offset) */
4516 int whichfork; /* data or attr fork */ 4516 int whichfork; /* data or attr fork */
4517 char inhole; /* current location is hole in file */
4518 char wasdelay; /* old extent was delayed */
4519 4517
4520#ifdef DEBUG 4518#ifdef DEBUG
4521 xfs_fileoff_t orig_bno; /* original block number value */ 4519 xfs_fileoff_t orig_bno; /* original block number value */
@@ -4603,22 +4601,44 @@ xfs_bmapi_write(
4603 bma.firstblock = firstblock; 4601 bma.firstblock = firstblock;
4604 4602
4605 while (bno < end && n < *nmap) { 4603 while (bno < end && n < *nmap) {
4606 inhole = eof || bma.got.br_startoff > bno; 4604 bool need_alloc = false, wasdelay = false;
4607 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4608 4605
4609 /* 4606 /* in hole or beyoned EOF? */
4610 * Make sure we only reflink into a hole. 4607 if (eof || bma.got.br_startoff > bno) {
4611 */ 4608 if (flags & XFS_BMAPI_DELALLOC) {
4612 if (flags & XFS_BMAPI_REMAP) 4609 /*
4613 ASSERT(inhole); 4610 * For the COW fork we can reasonably get a
4614 if (flags & XFS_BMAPI_COWFORK) 4611 * request for converting an extent that races
4615 ASSERT(!inhole); 4612 * with other threads already having converted
4613 * part of it, as there converting COW to
4614 * regular blocks is not protected using the
4615 * IOLOCK.
4616 */
4617 ASSERT(flags & XFS_BMAPI_COWFORK);
4618 if (!(flags & XFS_BMAPI_COWFORK)) {
4619 error = -EIO;
4620 goto error0;
4621 }
4622
4623 if (eof || bno >= end)
4624 break;
4625 } else {
4626 need_alloc = true;
4627 }
4628 } else {
4629 /*
4630 * Make sure we only reflink into a hole.
4631 */
4632 ASSERT(!(flags & XFS_BMAPI_REMAP));
4633 if (isnullstartblock(bma.got.br_startblock))
4634 wasdelay = true;
4635 }
4616 4636
4617 /* 4637 /*
4618 * First, deal with the hole before the allocated space 4638 * First, deal with the hole before the allocated space
4619 * that we found, if any. 4639 * that we found, if any.
4620 */ 4640 */
4621 if (inhole || wasdelay) { 4641 if (need_alloc || wasdelay) {
4622 bma.eof = eof; 4642 bma.eof = eof;
4623 bma.conv = !!(flags & XFS_BMAPI_CONVERT); 4643 bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4624 bma.wasdel = wasdelay; 4644 bma.wasdel = wasdelay;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index cecd094404cc..cdef87db5262 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -110,6 +110,9 @@ struct xfs_extent_free_item
110/* Map something in the CoW fork. */ 110/* Map something in the CoW fork. */
111#define XFS_BMAPI_COWFORK 0x200 111#define XFS_BMAPI_COWFORK 0x200
112 112
113/* Only convert delalloc space, don't allocate entirely new extents */
114#define XFS_BMAPI_DELALLOC 0x400
115
113#define XFS_BMAPI_FLAGS \ 116#define XFS_BMAPI_FLAGS \
114 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 117 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
115 { XFS_BMAPI_METADATA, "METADATA" }, \ 118 { XFS_BMAPI_METADATA, "METADATA" }, \
@@ -120,7 +123,8 @@ struct xfs_extent_free_item
120 { XFS_BMAPI_CONVERT, "CONVERT" }, \ 123 { XFS_BMAPI_CONVERT, "CONVERT" }, \
121 { XFS_BMAPI_ZERO, "ZERO" }, \ 124 { XFS_BMAPI_ZERO, "ZERO" }, \
122 { XFS_BMAPI_REMAP, "REMAP" }, \ 125 { XFS_BMAPI_REMAP, "REMAP" }, \
123 { XFS_BMAPI_COWFORK, "COWFORK" } 126 { XFS_BMAPI_COWFORK, "COWFORK" }, \
127 { XFS_BMAPI_DELALLOC, "DELALLOC" }
124 128
125 129
126static inline int xfs_bmapi_aflag(int w) 130static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 0fd086d03d41..7c471881c9a6 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -82,11 +82,12 @@ xfs_finobt_set_root(
82} 82}
83 83
84STATIC int 84STATIC int
85xfs_inobt_alloc_block( 85__xfs_inobt_alloc_block(
86 struct xfs_btree_cur *cur, 86 struct xfs_btree_cur *cur,
87 union xfs_btree_ptr *start, 87 union xfs_btree_ptr *start,
88 union xfs_btree_ptr *new, 88 union xfs_btree_ptr *new,
89 int *stat) 89 int *stat,
90 enum xfs_ag_resv_type resv)
90{ 91{
91 xfs_alloc_arg_t args; /* block allocation args */ 92 xfs_alloc_arg_t args; /* block allocation args */
92 int error; /* error return value */ 93 int error; /* error return value */
@@ -103,6 +104,7 @@ xfs_inobt_alloc_block(
103 args.maxlen = 1; 104 args.maxlen = 1;
104 args.prod = 1; 105 args.prod = 1;
105 args.type = XFS_ALLOCTYPE_NEAR_BNO; 106 args.type = XFS_ALLOCTYPE_NEAR_BNO;
107 args.resv = resv;
106 108
107 error = xfs_alloc_vextent(&args); 109 error = xfs_alloc_vextent(&args);
108 if (error) { 110 if (error) {
@@ -123,6 +125,27 @@ xfs_inobt_alloc_block(
123} 125}
124 126
125STATIC int 127STATIC int
128xfs_inobt_alloc_block(
129 struct xfs_btree_cur *cur,
130 union xfs_btree_ptr *start,
131 union xfs_btree_ptr *new,
132 int *stat)
133{
134 return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE);
135}
136
137STATIC int
138xfs_finobt_alloc_block(
139 struct xfs_btree_cur *cur,
140 union xfs_btree_ptr *start,
141 union xfs_btree_ptr *new,
142 int *stat)
143{
144 return __xfs_inobt_alloc_block(cur, start, new, stat,
145 XFS_AG_RESV_METADATA);
146}
147
148STATIC int
126xfs_inobt_free_block( 149xfs_inobt_free_block(
127 struct xfs_btree_cur *cur, 150 struct xfs_btree_cur *cur,
128 struct xfs_buf *bp) 151 struct xfs_buf *bp)
@@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = {
328 351
329 .dup_cursor = xfs_inobt_dup_cursor, 352 .dup_cursor = xfs_inobt_dup_cursor,
330 .set_root = xfs_finobt_set_root, 353 .set_root = xfs_finobt_set_root,
331 .alloc_block = xfs_inobt_alloc_block, 354 .alloc_block = xfs_finobt_alloc_block,
332 .free_block = xfs_inobt_free_block, 355 .free_block = xfs_inobt_free_block,
333 .get_minrecs = xfs_inobt_get_minrecs, 356 .get_minrecs = xfs_inobt_get_minrecs,
334 .get_maxrecs = xfs_inobt_get_maxrecs, 357 .get_maxrecs = xfs_inobt_get_maxrecs,
@@ -480,3 +503,64 @@ xfs_inobt_rec_check_count(
480 return 0; 503 return 0;
481} 504}
482#endif /* DEBUG */ 505#endif /* DEBUG */
506
507static xfs_extlen_t
508xfs_inobt_max_size(
509 struct xfs_mount *mp)
510{
511 /* Bail out if we're uninitialized, which can happen in mkfs. */
512 if (mp->m_inobt_mxr[0] == 0)
513 return 0;
514
515 return xfs_btree_calc_size(mp, mp->m_inobt_mnr,
516 (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock /
517 XFS_INODES_PER_CHUNK);
518}
519
520static int
521xfs_inobt_count_blocks(
522 struct xfs_mount *mp,
523 xfs_agnumber_t agno,
524 xfs_btnum_t btnum,
525 xfs_extlen_t *tree_blocks)
526{
527 struct xfs_buf *agbp;
528 struct xfs_btree_cur *cur;
529 int error;
530
531 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
532 if (error)
533 return error;
534
535 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum);
536 error = xfs_btree_count_blocks(cur, tree_blocks);
537 xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
538 xfs_buf_relse(agbp);
539
540 return error;
541}
542
543/*
544 * Figure out how many blocks to reserve and how many are used by this btree.
545 */
546int
547xfs_finobt_calc_reserves(
548 struct xfs_mount *mp,
549 xfs_agnumber_t agno,
550 xfs_extlen_t *ask,
551 xfs_extlen_t *used)
552{
553 xfs_extlen_t tree_len = 0;
554 int error;
555
556 if (!xfs_sb_version_hasfinobt(&mp->m_sb))
557 return 0;
558
559 error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len);
560 if (error)
561 return error;
562
563 *ask += xfs_inobt_max_size(mp);
564 *used += tree_len;
565 return 0;
566}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index bd88453217ce..aa81e2e63f3f 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *,
72#define xfs_inobt_rec_check_count(mp, rec) 0 72#define xfs_inobt_rec_check_count(mp, rec) 0
73#endif /* DEBUG */ 73#endif /* DEBUG */
74 74
75int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno,
76 xfs_extlen_t *ask, xfs_extlen_t *used);
77
75#endif /* __XFS_IALLOC_BTREE_H__ */ 78#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 2580262e4ea0..584ec896a533 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -242,7 +242,7 @@ xfs_mount_validate_sb(
242 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || 242 sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG ||
243 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || 243 sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
244 sbp->sb_blocksize != (1 << sbp->sb_blocklog) || 244 sbp->sb_blocksize != (1 << sbp->sb_blocklog) ||
245 sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || 245 sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG ||
246 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || 246 sbp->sb_inodesize < XFS_DINODE_MIN_SIZE ||
247 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || 247 sbp->sb_inodesize > XFS_DINODE_MAX_SIZE ||
248 sbp->sb_inodelog < XFS_DINODE_MIN_LOG || 248 sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index b9abce524c33..c1417919ab0a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -528,7 +528,6 @@ xfs_getbmap(
528 xfs_bmbt_irec_t *map; /* buffer for user's data */ 528 xfs_bmbt_irec_t *map; /* buffer for user's data */
529 xfs_mount_t *mp; /* file system mount point */ 529 xfs_mount_t *mp; /* file system mount point */
530 int nex; /* # of user extents can do */ 530 int nex; /* # of user extents can do */
531 int nexleft; /* # of user extents left */
532 int subnex; /* # of bmapi's can do */ 531 int subnex; /* # of bmapi's can do */
533 int nmap; /* number of map entries */ 532 int nmap; /* number of map entries */
534 struct getbmapx *out; /* output structure */ 533 struct getbmapx *out; /* output structure */
@@ -686,10 +685,8 @@ xfs_getbmap(
686 goto out_free_map; 685 goto out_free_map;
687 } 686 }
688 687
689 nexleft = nex;
690
691 do { 688 do {
692 nmap = (nexleft > subnex) ? subnex : nexleft; 689 nmap = (nex> subnex) ? subnex : nex;
693 error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), 690 error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
694 XFS_BB_TO_FSB(mp, bmv->bmv_length), 691 XFS_BB_TO_FSB(mp, bmv->bmv_length),
695 map, &nmap, bmapi_flags); 692 map, &nmap, bmapi_flags);
@@ -697,8 +694,8 @@ xfs_getbmap(
697 goto out_free_map; 694 goto out_free_map;
698 ASSERT(nmap <= subnex); 695 ASSERT(nmap <= subnex);
699 696
700 for (i = 0; i < nmap && nexleft && bmv->bmv_length && 697 for (i = 0; i < nmap && bmv->bmv_length &&
701 cur_ext < bmv->bmv_count; i++) { 698 cur_ext < bmv->bmv_count - 1; i++) {
702 out[cur_ext].bmv_oflags = 0; 699 out[cur_ext].bmv_oflags = 0;
703 if (map[i].br_state == XFS_EXT_UNWRITTEN) 700 if (map[i].br_state == XFS_EXT_UNWRITTEN)
704 out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; 701 out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
@@ -760,16 +757,27 @@ xfs_getbmap(
760 continue; 757 continue;
761 } 758 }
762 759
760 /*
761 * In order to report shared extents accurately,
762 * we report each distinct shared/unshared part
763 * of a single bmbt record using multiple bmap
764 * extents. To make that happen, we iterate the
765 * same map array item multiple times, each
766 * time trimming out the subextent that we just
767 * reported.
768 *
769 * Because of this, we must check the out array
770 * index (cur_ext) directly against bmv_count-1
771 * to avoid overflows.
772 */
763 if (inject_map.br_startblock != NULLFSBLOCK) { 773 if (inject_map.br_startblock != NULLFSBLOCK) {
764 map[i] = inject_map; 774 map[i] = inject_map;
765 i--; 775 i--;
766 } else 776 }
767 nexleft--;
768 bmv->bmv_entries++; 777 bmv->bmv_entries++;
769 cur_ext++; 778 cur_ext++;
770 } 779 }
771 } while (nmap && nexleft && bmv->bmv_length && 780 } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1);
772 cur_ext < bmv->bmv_count);
773 781
774 out_free_map: 782 out_free_map:
775 kmem_free(map); 783 kmem_free(map);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 7f0a01f7b592..ac3b4db519df 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -422,6 +422,7 @@ retry:
422out_free_pages: 422out_free_pages:
423 for (i = 0; i < bp->b_page_count; i++) 423 for (i = 0; i < bp->b_page_count; i++)
424 __free_page(bp->b_pages[i]); 424 __free_page(bp->b_pages[i]);
425 bp->b_flags &= ~_XBF_PAGES;
425 return error; 426 return error;
426} 427}
427 428
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b9557795eb74..de32f0fe47c8 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1792,22 +1792,23 @@ xfs_inactive_ifree(
1792 int error; 1792 int error;
1793 1793
1794 /* 1794 /*
1795 * The ifree transaction might need to allocate blocks for record 1795 * We try to use a per-AG reservation for any block needed by the finobt
1796 * insertion to the finobt. We don't want to fail here at ENOSPC, so 1796 * tree, but as the finobt feature predates the per-AG reservation
1797 * allow ifree to dip into the reserved block pool if necessary. 1797 * support a degraded file system might not have enough space for the
1798 * 1798 * reservation at mount time. In that case try to dip into the reserved
1799 * Freeing large sets of inodes generally means freeing inode chunks, 1799 * pool and pray.
1800 * directory and file data blocks, so this should be relatively safe.
1801 * Only under severe circumstances should it be possible to free enough
1802 * inodes to exhaust the reserve block pool via finobt expansion while
1803 * at the same time not creating free space in the filesystem.
1804 * 1800 *
1805 * Send a warning if the reservation does happen to fail, as the inode 1801 * Send a warning if the reservation does happen to fail, as the inode
1806 * now remains allocated and sits on the unlinked list until the fs is 1802 * now remains allocated and sits on the unlinked list until the fs is
1807 * repaired. 1803 * repaired.
1808 */ 1804 */
1809 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 1805 if (unlikely(mp->m_inotbt_nores)) {
1810 XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); 1806 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
1807 XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE,
1808 &tp);
1809 } else {
1810 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp);
1811 }
1811 if (error) { 1812 if (error) {
1812 if (error == -ENOSPC) { 1813 if (error == -ENOSPC) {
1813 xfs_warn_ratelimited(mp, 1814 xfs_warn_ratelimited(mp,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0d147428971e..1aa3abd67b36 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate(
681 xfs_trans_t *tp; 681 xfs_trans_t *tp;
682 int nimaps; 682 int nimaps;
683 int error = 0; 683 int error = 0;
684 int flags = 0; 684 int flags = XFS_BMAPI_DELALLOC;
685 int nres; 685 int nres;
686 686
687 if (whichfork == XFS_COW_FORK) 687 if (whichfork == XFS_COW_FORK)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 84f785218907..7f351f706b7a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -140,6 +140,7 @@ typedef struct xfs_mount {
140 int m_fixedfsid[2]; /* unchanged for life of FS */ 140 int m_fixedfsid[2]; /* unchanged for life of FS */
141 uint m_dmevmask; /* DMI events for this FS */ 141 uint m_dmevmask; /* DMI events for this FS */
142 __uint64_t m_flags; /* global mount flags */ 142 __uint64_t m_flags; /* global mount flags */
143 bool m_inotbt_nores; /* no per-AG finobt resv. */
143 int m_ialloc_inos; /* inodes in inode allocation */ 144 int m_ialloc_inos; /* inodes in inode allocation */
144 int m_ialloc_blks; /* blocks in inode allocation */ 145 int m_ialloc_blks; /* blocks in inode allocation */
145 int m_ialloc_min_blks;/* min blocks in sparse inode 146 int m_ialloc_min_blks;/* min blocks in sparse inode
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 45e50ea90769..b669b123287b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust(
1177 * the case in all other instances. It's OK that we do this because 1177 * the case in all other instances. It's OK that we do this because
1178 * quotacheck is done only at mount time. 1178 * quotacheck is done only at mount time.
1179 */ 1179 */
1180 error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); 1180 error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL,
1181 &ip);
1181 if (error) { 1182 if (error) {
1182 *res = BULKSTAT_RV_NOTHING; 1183 *res = BULKSTAT_RV_NOTHING;
1183 return error; 1184 return error;