diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-27 15:44:32 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-27 15:44:32 -0500 |
commit | 3365135d43f861003555c963b309672d053a2228 (patch) | |
tree | bfef4adec5da118bf1b3df7e5cff74f45af9e02d | |
parent | 5906374446386fd16fe562b042429d905d231ec3 (diff) | |
parent | e0d76fa4475ef2cf4b52d18588b8ce95153d021b (diff) |
Merge tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs uodates from Darrick Wong:
"I have some more fixes this week: better input validation, corruption
avoidance, build fixes, memory leak fixes, and a couple from Christoph
to avoid an ENOSPC failure.
Summary:
- Fix race conditions in the CoW code
- Fix some incorrect input validation checks
- Avoid crashing fs by running out of space when freeing inodes
- Fix toctou race wrt whether or not an inode has an attr
- Fix build error on arm
- Fix page refcount corruption when readahead fails
- Don't corrupt userspace in the bmap ioctl"
* tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: prevent quotacheck from overloading inode lru
xfs: fix bmv_count confusion w/ shared extents
xfs: clear _XBF_PAGES from buffers when readahead page
xfs: extsize hints are not unlikely in xfs_bmap_btalloc
xfs: remove racy hasattr check from attr ops
xfs: use per-AG reservations for the finobt
xfs: only update mount/resv fields on success in __xfs_ag_resv_init
xfs: verify dirblocklog correctly
xfs: fix COW writeback race
-rw-r--r-- | fs/xfs/libxfs/xfs_ag_resv.c | 70 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 48 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c | 90 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.h | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 28 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 3 |
13 files changed, 220 insertions, 63 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index d346d42c54d1..33db69be4832 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_rmap_btree.h" | 39 | #include "xfs_rmap_btree.h" |
40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
41 | #include "xfs_refcount_btree.h" | 41 | #include "xfs_refcount_btree.h" |
42 | #include "xfs_ialloc_btree.h" | ||
42 | 43 | ||
43 | /* | 44 | /* |
44 | * Per-AG Block Reservations | 45 | * Per-AG Block Reservations |
@@ -200,22 +201,30 @@ __xfs_ag_resv_init( | |||
200 | struct xfs_mount *mp = pag->pag_mount; | 201 | struct xfs_mount *mp = pag->pag_mount; |
201 | struct xfs_ag_resv *resv; | 202 | struct xfs_ag_resv *resv; |
202 | int error; | 203 | int error; |
204 | xfs_extlen_t reserved; | ||
203 | 205 | ||
204 | resv = xfs_perag_resv(pag, type); | ||
205 | if (used > ask) | 206 | if (used > ask) |
206 | ask = used; | 207 | ask = used; |
207 | resv->ar_asked = ask; | 208 | reserved = ask - used; |
208 | resv->ar_reserved = resv->ar_orig_reserved = ask - used; | ||
209 | mp->m_ag_max_usable -= ask; | ||
210 | 209 | ||
211 | trace_xfs_ag_resv_init(pag, type, ask); | 210 | error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); |
212 | 211 | if (error) { | |
213 | error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); | ||
214 | if (error) | ||
215 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, | 212 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, |
216 | error, _RET_IP_); | 213 | error, _RET_IP_); |
214 | xfs_warn(mp, | ||
215 | "Per-AG reservation for AG %u failed. Filesystem may run out of space.", | ||
216 | pag->pag_agno); | ||
217 | return error; | ||
218 | } | ||
217 | 219 | ||
218 | return error; | 220 | mp->m_ag_max_usable -= ask; |
221 | |||
222 | resv = xfs_perag_resv(pag, type); | ||
223 | resv->ar_asked = ask; | ||
224 | resv->ar_reserved = resv->ar_orig_reserved = reserved; | ||
225 | |||
226 | trace_xfs_ag_resv_init(pag, type, ask); | ||
227 | return 0; | ||
219 | } | 228 | } |
220 | 229 | ||
221 | /* Create a per-AG block reservation. */ | 230 | /* Create a per-AG block reservation. */ |
@@ -223,6 +232,8 @@ int | |||
223 | xfs_ag_resv_init( | 232 | xfs_ag_resv_init( |
224 | struct xfs_perag *pag) | 233 | struct xfs_perag *pag) |
225 | { | 234 | { |
235 | struct xfs_mount *mp = pag->pag_mount; | ||
236 | xfs_agnumber_t agno = pag->pag_agno; | ||
226 | xfs_extlen_t ask; | 237 | xfs_extlen_t ask; |
227 | xfs_extlen_t used; | 238 | xfs_extlen_t used; |
228 | int error = 0; | 239 | int error = 0; |
@@ -231,23 +242,45 @@ xfs_ag_resv_init( | |||
231 | if (pag->pag_meta_resv.ar_asked == 0) { | 242 | if (pag->pag_meta_resv.ar_asked == 0) { |
232 | ask = used = 0; | 243 | ask = used = 0; |
233 | 244 | ||
234 | error = xfs_refcountbt_calc_reserves(pag->pag_mount, | 245 | error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); |
235 | pag->pag_agno, &ask, &used); | ||
236 | if (error) | 246 | if (error) |
237 | goto out; | 247 | goto out; |
238 | 248 | ||
239 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | 249 | error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); |
240 | ask, used); | ||
241 | if (error) | 250 | if (error) |
242 | goto out; | 251 | goto out; |
252 | |||
253 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
254 | ask, used); | ||
255 | if (error) { | ||
256 | /* | ||
257 | * Because we didn't have per-AG reservations when the | ||
258 | * finobt feature was added we might not be able to | ||
259 | * reserve all needed blocks. Warn and fall back to the | ||
260 | * old and potentially buggy code in that case, but | ||
261 | * ensure we do have the reservation for the refcountbt. | ||
262 | */ | ||
263 | ask = used = 0; | ||
264 | |||
265 | mp->m_inotbt_nores = true; | ||
266 | |||
267 | error = xfs_refcountbt_calc_reserves(mp, agno, &ask, | ||
268 | &used); | ||
269 | if (error) | ||
270 | goto out; | ||
271 | |||
272 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
273 | ask, used); | ||
274 | if (error) | ||
275 | goto out; | ||
276 | } | ||
243 | } | 277 | } |
244 | 278 | ||
245 | /* Create the AGFL metadata reservation */ | 279 | /* Create the AGFL metadata reservation */ |
246 | if (pag->pag_agfl_resv.ar_asked == 0) { | 280 | if (pag->pag_agfl_resv.ar_asked == 0) { |
247 | ask = used = 0; | 281 | ask = used = 0; |
248 | 282 | ||
249 | error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, | 283 | error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); |
250 | &ask, &used); | ||
251 | if (error) | 284 | if (error) |
252 | goto out; | 285 | goto out; |
253 | 286 | ||
@@ -256,9 +289,16 @@ xfs_ag_resv_init( | |||
256 | goto out; | 289 | goto out; |
257 | } | 290 | } |
258 | 291 | ||
292 | #ifdef DEBUG | ||
293 | /* need to read in the AGF for the ASSERT below to work */ | ||
294 | error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); | ||
295 | if (error) | ||
296 | return error; | ||
297 | |||
259 | ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + | 298 | ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + |
260 | xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= | 299 | xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= |
261 | pag->pagf_freeblks + pag->pagf_flcount); | 300 | pag->pagf_freeblks + pag->pagf_flcount); |
301 | #endif | ||
262 | out: | 302 | out: |
263 | return error; | 303 | return error; |
264 | } | 304 | } |
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c | |||
@@ -131,9 +131,6 @@ xfs_attr_get( | |||
131 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 131 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
132 | return -EIO; | 132 | return -EIO; |
133 | 133 | ||
134 | if (!xfs_inode_hasattr(ip)) | ||
135 | return -ENOATTR; | ||
136 | |||
137 | error = xfs_attr_args_init(&args, ip, name, flags); | 134 | error = xfs_attr_args_init(&args, ip, name, flags); |
138 | if (error) | 135 | if (error) |
139 | return error; | 136 | return error; |
@@ -392,9 +389,6 @@ xfs_attr_remove( | |||
392 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 389 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
393 | return -EIO; | 390 | return -EIO; |
394 | 391 | ||
395 | if (!xfs_inode_hasattr(dp)) | ||
396 | return -ENOATTR; | ||
397 | |||
398 | error = xfs_attr_args_init(&args, dp, name, flags); | 392 | error = xfs_attr_args_init(&args, dp, name, flags); |
399 | if (error) | 393 | if (error) |
400 | return error; | 394 | return error; |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 44773c9eb957..bfc00de5c6f1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -3629,7 +3629,7 @@ xfs_bmap_btalloc( | |||
3629 | align = xfs_get_cowextsz_hint(ap->ip); | 3629 | align = xfs_get_cowextsz_hint(ap->ip); |
3630 | else if (xfs_alloc_is_userdata(ap->datatype)) | 3630 | else if (xfs_alloc_is_userdata(ap->datatype)) |
3631 | align = xfs_get_extsz_hint(ap->ip); | 3631 | align = xfs_get_extsz_hint(ap->ip); |
3632 | if (unlikely(align)) { | 3632 | if (align) { |
3633 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, | 3633 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, |
3634 | align, 0, ap->eof, 0, ap->conv, | 3634 | align, 0, ap->eof, 0, ap->conv, |
3635 | &ap->offset, &ap->length); | 3635 | &ap->offset, &ap->length); |
@@ -3701,7 +3701,7 @@ xfs_bmap_btalloc( | |||
3701 | args.minlen = ap->minlen; | 3701 | args.minlen = ap->minlen; |
3702 | } | 3702 | } |
3703 | /* apply extent size hints if obtained earlier */ | 3703 | /* apply extent size hints if obtained earlier */ |
3704 | if (unlikely(align)) { | 3704 | if (align) { |
3705 | args.prod = align; | 3705 | args.prod = align; |
3706 | if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) | 3706 | if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) |
3707 | args.mod = (xfs_extlen_t)(args.prod - args.mod); | 3707 | args.mod = (xfs_extlen_t)(args.prod - args.mod); |
@@ -4514,8 +4514,6 @@ xfs_bmapi_write( | |||
4514 | int n; /* current extent index */ | 4514 | int n; /* current extent index */ |
4515 | xfs_fileoff_t obno; /* old block number (offset) */ | 4515 | xfs_fileoff_t obno; /* old block number (offset) */ |
4516 | int whichfork; /* data or attr fork */ | 4516 | int whichfork; /* data or attr fork */ |
4517 | char inhole; /* current location is hole in file */ | ||
4518 | char wasdelay; /* old extent was delayed */ | ||
4519 | 4517 | ||
4520 | #ifdef DEBUG | 4518 | #ifdef DEBUG |
4521 | xfs_fileoff_t orig_bno; /* original block number value */ | 4519 | xfs_fileoff_t orig_bno; /* original block number value */ |
@@ -4603,22 +4601,44 @@ xfs_bmapi_write( | |||
4603 | bma.firstblock = firstblock; | 4601 | bma.firstblock = firstblock; |
4604 | 4602 | ||
4605 | while (bno < end && n < *nmap) { | 4603 | while (bno < end && n < *nmap) { |
4606 | inhole = eof || bma.got.br_startoff > bno; | 4604 | bool need_alloc = false, wasdelay = false; |
4607 | wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); | ||
4608 | 4605 | ||
4609 | /* | 4606 | /* in hole or beyoned EOF? */ |
4610 | * Make sure we only reflink into a hole. | 4607 | if (eof || bma.got.br_startoff > bno) { |
4611 | */ | 4608 | if (flags & XFS_BMAPI_DELALLOC) { |
4612 | if (flags & XFS_BMAPI_REMAP) | 4609 | /* |
4613 | ASSERT(inhole); | 4610 | * For the COW fork we can reasonably get a |
4614 | if (flags & XFS_BMAPI_COWFORK) | 4611 | * request for converting an extent that races |
4615 | ASSERT(!inhole); | 4612 | * with other threads already having converted |
4613 | * part of it, as there converting COW to | ||
4614 | * regular blocks is not protected using the | ||
4615 | * IOLOCK. | ||
4616 | */ | ||
4617 | ASSERT(flags & XFS_BMAPI_COWFORK); | ||
4618 | if (!(flags & XFS_BMAPI_COWFORK)) { | ||
4619 | error = -EIO; | ||
4620 | goto error0; | ||
4621 | } | ||
4622 | |||
4623 | if (eof || bno >= end) | ||
4624 | break; | ||
4625 | } else { | ||
4626 | need_alloc = true; | ||
4627 | } | ||
4628 | } else { | ||
4629 | /* | ||
4630 | * Make sure we only reflink into a hole. | ||
4631 | */ | ||
4632 | ASSERT(!(flags & XFS_BMAPI_REMAP)); | ||
4633 | if (isnullstartblock(bma.got.br_startblock)) | ||
4634 | wasdelay = true; | ||
4635 | } | ||
4616 | 4636 | ||
4617 | /* | 4637 | /* |
4618 | * First, deal with the hole before the allocated space | 4638 | * First, deal with the hole before the allocated space |
4619 | * that we found, if any. | 4639 | * that we found, if any. |
4620 | */ | 4640 | */ |
4621 | if (inhole || wasdelay) { | 4641 | if (need_alloc || wasdelay) { |
4622 | bma.eof = eof; | 4642 | bma.eof = eof; |
4623 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); | 4643 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); |
4624 | bma.wasdel = wasdelay; | 4644 | bma.wasdel = wasdelay; |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cecd094404cc..cdef87db5262 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
@@ -110,6 +110,9 @@ struct xfs_extent_free_item | |||
110 | /* Map something in the CoW fork. */ | 110 | /* Map something in the CoW fork. */ |
111 | #define XFS_BMAPI_COWFORK 0x200 | 111 | #define XFS_BMAPI_COWFORK 0x200 |
112 | 112 | ||
113 | /* Only convert delalloc space, don't allocate entirely new extents */ | ||
114 | #define XFS_BMAPI_DELALLOC 0x400 | ||
115 | |||
113 | #define XFS_BMAPI_FLAGS \ | 116 | #define XFS_BMAPI_FLAGS \ |
114 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | 117 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ |
115 | { XFS_BMAPI_METADATA, "METADATA" }, \ | 118 | { XFS_BMAPI_METADATA, "METADATA" }, \ |
@@ -120,7 +123,8 @@ struct xfs_extent_free_item | |||
120 | { XFS_BMAPI_CONVERT, "CONVERT" }, \ | 123 | { XFS_BMAPI_CONVERT, "CONVERT" }, \ |
121 | { XFS_BMAPI_ZERO, "ZERO" }, \ | 124 | { XFS_BMAPI_ZERO, "ZERO" }, \ |
122 | { XFS_BMAPI_REMAP, "REMAP" }, \ | 125 | { XFS_BMAPI_REMAP, "REMAP" }, \ |
123 | { XFS_BMAPI_COWFORK, "COWFORK" } | 126 | { XFS_BMAPI_COWFORK, "COWFORK" }, \ |
127 | { XFS_BMAPI_DELALLOC, "DELALLOC" } | ||
124 | 128 | ||
125 | 129 | ||
126 | static inline int xfs_bmapi_aflag(int w) | 130 | static inline int xfs_bmapi_aflag(int w) |
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 0fd086d03d41..7c471881c9a6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | |||
@@ -82,11 +82,12 @@ xfs_finobt_set_root( | |||
82 | } | 82 | } |
83 | 83 | ||
84 | STATIC int | 84 | STATIC int |
85 | xfs_inobt_alloc_block( | 85 | __xfs_inobt_alloc_block( |
86 | struct xfs_btree_cur *cur, | 86 | struct xfs_btree_cur *cur, |
87 | union xfs_btree_ptr *start, | 87 | union xfs_btree_ptr *start, |
88 | union xfs_btree_ptr *new, | 88 | union xfs_btree_ptr *new, |
89 | int *stat) | 89 | int *stat, |
90 | enum xfs_ag_resv_type resv) | ||
90 | { | 91 | { |
91 | xfs_alloc_arg_t args; /* block allocation args */ | 92 | xfs_alloc_arg_t args; /* block allocation args */ |
92 | int error; /* error return value */ | 93 | int error; /* error return value */ |
@@ -103,6 +104,7 @@ xfs_inobt_alloc_block( | |||
103 | args.maxlen = 1; | 104 | args.maxlen = 1; |
104 | args.prod = 1; | 105 | args.prod = 1; |
105 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 106 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
107 | args.resv = resv; | ||
106 | 108 | ||
107 | error = xfs_alloc_vextent(&args); | 109 | error = xfs_alloc_vextent(&args); |
108 | if (error) { | 110 | if (error) { |
@@ -123,6 +125,27 @@ xfs_inobt_alloc_block( | |||
123 | } | 125 | } |
124 | 126 | ||
125 | STATIC int | 127 | STATIC int |
128 | xfs_inobt_alloc_block( | ||
129 | struct xfs_btree_cur *cur, | ||
130 | union xfs_btree_ptr *start, | ||
131 | union xfs_btree_ptr *new, | ||
132 | int *stat) | ||
133 | { | ||
134 | return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); | ||
135 | } | ||
136 | |||
137 | STATIC int | ||
138 | xfs_finobt_alloc_block( | ||
139 | struct xfs_btree_cur *cur, | ||
140 | union xfs_btree_ptr *start, | ||
141 | union xfs_btree_ptr *new, | ||
142 | int *stat) | ||
143 | { | ||
144 | return __xfs_inobt_alloc_block(cur, start, new, stat, | ||
145 | XFS_AG_RESV_METADATA); | ||
146 | } | ||
147 | |||
148 | STATIC int | ||
126 | xfs_inobt_free_block( | 149 | xfs_inobt_free_block( |
127 | struct xfs_btree_cur *cur, | 150 | struct xfs_btree_cur *cur, |
128 | struct xfs_buf *bp) | 151 | struct xfs_buf *bp) |
@@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { | |||
328 | 351 | ||
329 | .dup_cursor = xfs_inobt_dup_cursor, | 352 | .dup_cursor = xfs_inobt_dup_cursor, |
330 | .set_root = xfs_finobt_set_root, | 353 | .set_root = xfs_finobt_set_root, |
331 | .alloc_block = xfs_inobt_alloc_block, | 354 | .alloc_block = xfs_finobt_alloc_block, |
332 | .free_block = xfs_inobt_free_block, | 355 | .free_block = xfs_inobt_free_block, |
333 | .get_minrecs = xfs_inobt_get_minrecs, | 356 | .get_minrecs = xfs_inobt_get_minrecs, |
334 | .get_maxrecs = xfs_inobt_get_maxrecs, | 357 | .get_maxrecs = xfs_inobt_get_maxrecs, |
@@ -480,3 +503,64 @@ xfs_inobt_rec_check_count( | |||
480 | return 0; | 503 | return 0; |
481 | } | 504 | } |
482 | #endif /* DEBUG */ | 505 | #endif /* DEBUG */ |
506 | |||
507 | static xfs_extlen_t | ||
508 | xfs_inobt_max_size( | ||
509 | struct xfs_mount *mp) | ||
510 | { | ||
511 | /* Bail out if we're uninitialized, which can happen in mkfs. */ | ||
512 | if (mp->m_inobt_mxr[0] == 0) | ||
513 | return 0; | ||
514 | |||
515 | return xfs_btree_calc_size(mp, mp->m_inobt_mnr, | ||
516 | (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / | ||
517 | XFS_INODES_PER_CHUNK); | ||
518 | } | ||
519 | |||
520 | static int | ||
521 | xfs_inobt_count_blocks( | ||
522 | struct xfs_mount *mp, | ||
523 | xfs_agnumber_t agno, | ||
524 | xfs_btnum_t btnum, | ||
525 | xfs_extlen_t *tree_blocks) | ||
526 | { | ||
527 | struct xfs_buf *agbp; | ||
528 | struct xfs_btree_cur *cur; | ||
529 | int error; | ||
530 | |||
531 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | ||
532 | if (error) | ||
533 | return error; | ||
534 | |||
535 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); | ||
536 | error = xfs_btree_count_blocks(cur, tree_blocks); | ||
537 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
538 | xfs_buf_relse(agbp); | ||
539 | |||
540 | return error; | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Figure out how many blocks to reserve and how many are used by this btree. | ||
545 | */ | ||
546 | int | ||
547 | xfs_finobt_calc_reserves( | ||
548 | struct xfs_mount *mp, | ||
549 | xfs_agnumber_t agno, | ||
550 | xfs_extlen_t *ask, | ||
551 | xfs_extlen_t *used) | ||
552 | { | ||
553 | xfs_extlen_t tree_len = 0; | ||
554 | int error; | ||
555 | |||
556 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
557 | return 0; | ||
558 | |||
559 | error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); | ||
560 | if (error) | ||
561 | return error; | ||
562 | |||
563 | *ask += xfs_inobt_max_size(mp); | ||
564 | *used += tree_len; | ||
565 | return 0; | ||
566 | } | ||
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bd88453217ce..aa81e2e63f3f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h | |||
@@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, | |||
72 | #define xfs_inobt_rec_check_count(mp, rec) 0 | 72 | #define xfs_inobt_rec_check_count(mp, rec) 0 |
73 | #endif /* DEBUG */ | 73 | #endif /* DEBUG */ |
74 | 74 | ||
75 | int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
76 | xfs_extlen_t *ask, xfs_extlen_t *used); | ||
77 | |||
75 | #endif /* __XFS_IALLOC_BTREE_H__ */ | 78 | #endif /* __XFS_IALLOC_BTREE_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2580262e4ea0..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
@@ -242,7 +242,7 @@ xfs_mount_validate_sb( | |||
242 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || | 242 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || |
243 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || | 243 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || |
244 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || | 244 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || |
245 | sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || | 245 | sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || |
246 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || | 246 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || |
247 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || | 247 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || |
248 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | 248 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index b9abce524c33..c1417919ab0a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -528,7 +528,6 @@ xfs_getbmap( | |||
528 | xfs_bmbt_irec_t *map; /* buffer for user's data */ | 528 | xfs_bmbt_irec_t *map; /* buffer for user's data */ |
529 | xfs_mount_t *mp; /* file system mount point */ | 529 | xfs_mount_t *mp; /* file system mount point */ |
530 | int nex; /* # of user extents can do */ | 530 | int nex; /* # of user extents can do */ |
531 | int nexleft; /* # of user extents left */ | ||
532 | int subnex; /* # of bmapi's can do */ | 531 | int subnex; /* # of bmapi's can do */ |
533 | int nmap; /* number of map entries */ | 532 | int nmap; /* number of map entries */ |
534 | struct getbmapx *out; /* output structure */ | 533 | struct getbmapx *out; /* output structure */ |
@@ -686,10 +685,8 @@ xfs_getbmap( | |||
686 | goto out_free_map; | 685 | goto out_free_map; |
687 | } | 686 | } |
688 | 687 | ||
689 | nexleft = nex; | ||
690 | |||
691 | do { | 688 | do { |
692 | nmap = (nexleft > subnex) ? subnex : nexleft; | 689 | nmap = (nex> subnex) ? subnex : nex; |
693 | error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), | 690 | error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), |
694 | XFS_BB_TO_FSB(mp, bmv->bmv_length), | 691 | XFS_BB_TO_FSB(mp, bmv->bmv_length), |
695 | map, &nmap, bmapi_flags); | 692 | map, &nmap, bmapi_flags); |
@@ -697,8 +694,8 @@ xfs_getbmap( | |||
697 | goto out_free_map; | 694 | goto out_free_map; |
698 | ASSERT(nmap <= subnex); | 695 | ASSERT(nmap <= subnex); |
699 | 696 | ||
700 | for (i = 0; i < nmap && nexleft && bmv->bmv_length && | 697 | for (i = 0; i < nmap && bmv->bmv_length && |
701 | cur_ext < bmv->bmv_count; i++) { | 698 | cur_ext < bmv->bmv_count - 1; i++) { |
702 | out[cur_ext].bmv_oflags = 0; | 699 | out[cur_ext].bmv_oflags = 0; |
703 | if (map[i].br_state == XFS_EXT_UNWRITTEN) | 700 | if (map[i].br_state == XFS_EXT_UNWRITTEN) |
704 | out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; | 701 | out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; |
@@ -760,16 +757,27 @@ xfs_getbmap( | |||
760 | continue; | 757 | continue; |
761 | } | 758 | } |
762 | 759 | ||
760 | /* | ||
761 | * In order to report shared extents accurately, | ||
762 | * we report each distinct shared/unshared part | ||
763 | * of a single bmbt record using multiple bmap | ||
764 | * extents. To make that happen, we iterate the | ||
765 | * same map array item multiple times, each | ||
766 | * time trimming out the subextent that we just | ||
767 | * reported. | ||
768 | * | ||
769 | * Because of this, we must check the out array | ||
770 | * index (cur_ext) directly against bmv_count-1 | ||
771 | * to avoid overflows. | ||
772 | */ | ||
763 | if (inject_map.br_startblock != NULLFSBLOCK) { | 773 | if (inject_map.br_startblock != NULLFSBLOCK) { |
764 | map[i] = inject_map; | 774 | map[i] = inject_map; |
765 | i--; | 775 | i--; |
766 | } else | 776 | } |
767 | nexleft--; | ||
768 | bmv->bmv_entries++; | 777 | bmv->bmv_entries++; |
769 | cur_ext++; | 778 | cur_ext++; |
770 | } | 779 | } |
771 | } while (nmap && nexleft && bmv->bmv_length && | 780 | } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); |
772 | cur_ext < bmv->bmv_count); | ||
773 | 781 | ||
774 | out_free_map: | 782 | out_free_map: |
775 | kmem_free(map); | 783 | kmem_free(map); |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7f0a01f7b592..ac3b4db519df 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -422,6 +422,7 @@ retry: | |||
422 | out_free_pages: | 422 | out_free_pages: |
423 | for (i = 0; i < bp->b_page_count; i++) | 423 | for (i = 0; i < bp->b_page_count; i++) |
424 | __free_page(bp->b_pages[i]); | 424 | __free_page(bp->b_pages[i]); |
425 | bp->b_flags &= ~_XBF_PAGES; | ||
425 | return error; | 426 | return error; |
426 | } | 427 | } |
427 | 428 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9557795eb74..de32f0fe47c8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -1792,22 +1792,23 @@ xfs_inactive_ifree( | |||
1792 | int error; | 1792 | int error; |
1793 | 1793 | ||
1794 | /* | 1794 | /* |
1795 | * The ifree transaction might need to allocate blocks for record | 1795 | * We try to use a per-AG reservation for any block needed by the finobt |
1796 | * insertion to the finobt. We don't want to fail here at ENOSPC, so | 1796 | * tree, but as the finobt feature predates the per-AG reservation |
1797 | * allow ifree to dip into the reserved block pool if necessary. | 1797 | * support a degraded file system might not have enough space for the |
1798 | * | 1798 | * reservation at mount time. In that case try to dip into the reserved |
1799 | * Freeing large sets of inodes generally means freeing inode chunks, | 1799 | * pool and pray. |
1800 | * directory and file data blocks, so this should be relatively safe. | ||
1801 | * Only under severe circumstances should it be possible to free enough | ||
1802 | * inodes to exhaust the reserve block pool via finobt expansion while | ||
1803 | * at the same time not creating free space in the filesystem. | ||
1804 | * | 1800 | * |
1805 | * Send a warning if the reservation does happen to fail, as the inode | 1801 | * Send a warning if the reservation does happen to fail, as the inode |
1806 | * now remains allocated and sits on the unlinked list until the fs is | 1802 | * now remains allocated and sits on the unlinked list until the fs is |
1807 | * repaired. | 1803 | * repaired. |
1808 | */ | 1804 | */ |
1809 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, | 1805 | if (unlikely(mp->m_inotbt_nores)) { |
1810 | XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); | 1806 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, |
1807 | XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, | ||
1808 | &tp); | ||
1809 | } else { | ||
1810 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); | ||
1811 | } | ||
1811 | if (error) { | 1812 | if (error) { |
1812 | if (error == -ENOSPC) { | 1813 | if (error == -ENOSPC) { |
1813 | xfs_warn_ratelimited(mp, | 1814 | xfs_warn_ratelimited(mp, |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0d147428971e..1aa3abd67b36 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -681,7 +681,7 @@ xfs_iomap_write_allocate( | |||
681 | xfs_trans_t *tp; | 681 | xfs_trans_t *tp; |
682 | int nimaps; | 682 | int nimaps; |
683 | int error = 0; | 683 | int error = 0; |
684 | int flags = 0; | 684 | int flags = XFS_BMAPI_DELALLOC; |
685 | int nres; | 685 | int nres; |
686 | 686 | ||
687 | if (whichfork == XFS_COW_FORK) | 687 | if (whichfork == XFS_COW_FORK) |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 84f785218907..7f351f706b7a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -140,6 +140,7 @@ typedef struct xfs_mount { | |||
140 | int m_fixedfsid[2]; /* unchanged for life of FS */ | 140 | int m_fixedfsid[2]; /* unchanged for life of FS */ |
141 | uint m_dmevmask; /* DMI events for this FS */ | 141 | uint m_dmevmask; /* DMI events for this FS */ |
142 | __uint64_t m_flags; /* global mount flags */ | 142 | __uint64_t m_flags; /* global mount flags */ |
143 | bool m_inotbt_nores; /* no per-AG finobt resv. */ | ||
143 | int m_ialloc_inos; /* inodes in inode allocation */ | 144 | int m_ialloc_inos; /* inodes in inode allocation */ |
144 | int m_ialloc_blks; /* blocks in inode allocation */ | 145 | int m_ialloc_blks; /* blocks in inode allocation */ |
145 | int m_ialloc_min_blks;/* min blocks in sparse inode | 146 | int m_ialloc_min_blks;/* min blocks in sparse inode |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 45e50ea90769..b669b123287b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
@@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( | |||
1177 | * the case in all other instances. It's OK that we do this because | 1177 | * the case in all other instances. It's OK that we do this because |
1178 | * quotacheck is done only at mount time. | 1178 | * quotacheck is done only at mount time. |
1179 | */ | 1179 | */ |
1180 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); | 1180 | error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, |
1181 | &ip); | ||
1181 | if (error) { | 1182 | if (error) { |
1182 | *res = BULKSTAT_RV_NOTHING; | 1183 | *res = BULKSTAT_RV_NOTHING; |
1183 | return error; | 1184 | return error; |