diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-27 15:44:32 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-27 15:44:32 -0500 |
| commit | 3365135d43f861003555c963b309672d053a2228 (patch) | |
| tree | bfef4adec5da118bf1b3df7e5cff74f45af9e02d | |
| parent | 5906374446386fd16fe562b042429d905d231ec3 (diff) | |
| parent | e0d76fa4475ef2cf4b52d18588b8ce95153d021b (diff) | |
Merge tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs uodates from Darrick Wong:
"I have some more fixes this week: better input validation, corruption
avoidance, build fixes, memory leak fixes, and a couple from Christoph
to avoid an ENOSPC failure.
Summary:
- Fix race conditions in the CoW code
- Fix some incorrect input validation checks
- Avoid crashing fs by running out of space when freeing inodes
- Fix toctou race wrt whether or not an inode has an attr
- Fix build error on arm
- Fix page refcount corruption when readahead fails
- Don't corrupt userspace in the bmap ioctl"
* tag 'xfs-for-linus-4.10-rc6-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
xfs: prevent quotacheck from overloading inode lru
xfs: fix bmv_count confusion w/ shared extents
xfs: clear _XBF_PAGES from buffers when readahead page
xfs: extsize hints are not unlikely in xfs_bmap_btalloc
xfs: remove racy hasattr check from attr ops
xfs: use per-AG reservations for the finobt
xfs: only update mount/resv fields on success in __xfs_ag_resv_init
xfs: verify dirblocklog correctly
xfs: fix COW writeback race
| -rw-r--r-- | fs/xfs/libxfs/xfs_ag_resv.c | 70 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_attr.c | 6 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 48 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 6 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c | 90 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.h | 3 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_sb.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap_util.c | 28 | ||||
| -rw-r--r-- | fs/xfs/xfs_buf.c | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 23 | ||||
| -rw-r--r-- | fs/xfs/xfs_iomap.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_qm.c | 3 |
13 files changed, 220 insertions, 63 deletions
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index d346d42c54d1..33db69be4832 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include "xfs_rmap_btree.h" | 39 | #include "xfs_rmap_btree.h" |
| 40 | #include "xfs_btree.h" | 40 | #include "xfs_btree.h" |
| 41 | #include "xfs_refcount_btree.h" | 41 | #include "xfs_refcount_btree.h" |
| 42 | #include "xfs_ialloc_btree.h" | ||
| 42 | 43 | ||
| 43 | /* | 44 | /* |
| 44 | * Per-AG Block Reservations | 45 | * Per-AG Block Reservations |
| @@ -200,22 +201,30 @@ __xfs_ag_resv_init( | |||
| 200 | struct xfs_mount *mp = pag->pag_mount; | 201 | struct xfs_mount *mp = pag->pag_mount; |
| 201 | struct xfs_ag_resv *resv; | 202 | struct xfs_ag_resv *resv; |
| 202 | int error; | 203 | int error; |
| 204 | xfs_extlen_t reserved; | ||
| 203 | 205 | ||
| 204 | resv = xfs_perag_resv(pag, type); | ||
| 205 | if (used > ask) | 206 | if (used > ask) |
| 206 | ask = used; | 207 | ask = used; |
| 207 | resv->ar_asked = ask; | 208 | reserved = ask - used; |
| 208 | resv->ar_reserved = resv->ar_orig_reserved = ask - used; | ||
| 209 | mp->m_ag_max_usable -= ask; | ||
| 210 | 209 | ||
| 211 | trace_xfs_ag_resv_init(pag, type, ask); | 210 | error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); |
| 212 | 211 | if (error) { | |
| 213 | error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); | ||
| 214 | if (error) | ||
| 215 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, | 212 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, |
| 216 | error, _RET_IP_); | 213 | error, _RET_IP_); |
| 214 | xfs_warn(mp, | ||
| 215 | "Per-AG reservation for AG %u failed. Filesystem may run out of space.", | ||
| 216 | pag->pag_agno); | ||
| 217 | return error; | ||
| 218 | } | ||
| 217 | 219 | ||
| 218 | return error; | 220 | mp->m_ag_max_usable -= ask; |
| 221 | |||
| 222 | resv = xfs_perag_resv(pag, type); | ||
| 223 | resv->ar_asked = ask; | ||
| 224 | resv->ar_reserved = resv->ar_orig_reserved = reserved; | ||
| 225 | |||
| 226 | trace_xfs_ag_resv_init(pag, type, ask); | ||
| 227 | return 0; | ||
| 219 | } | 228 | } |
| 220 | 229 | ||
| 221 | /* Create a per-AG block reservation. */ | 230 | /* Create a per-AG block reservation. */ |
| @@ -223,6 +232,8 @@ int | |||
| 223 | xfs_ag_resv_init( | 232 | xfs_ag_resv_init( |
| 224 | struct xfs_perag *pag) | 233 | struct xfs_perag *pag) |
| 225 | { | 234 | { |
| 235 | struct xfs_mount *mp = pag->pag_mount; | ||
| 236 | xfs_agnumber_t agno = pag->pag_agno; | ||
| 226 | xfs_extlen_t ask; | 237 | xfs_extlen_t ask; |
| 227 | xfs_extlen_t used; | 238 | xfs_extlen_t used; |
| 228 | int error = 0; | 239 | int error = 0; |
| @@ -231,23 +242,45 @@ xfs_ag_resv_init( | |||
| 231 | if (pag->pag_meta_resv.ar_asked == 0) { | 242 | if (pag->pag_meta_resv.ar_asked == 0) { |
| 232 | ask = used = 0; | 243 | ask = used = 0; |
| 233 | 244 | ||
| 234 | error = xfs_refcountbt_calc_reserves(pag->pag_mount, | 245 | error = xfs_refcountbt_calc_reserves(mp, agno, &ask, &used); |
| 235 | pag->pag_agno, &ask, &used); | ||
| 236 | if (error) | 246 | if (error) |
| 237 | goto out; | 247 | goto out; |
| 238 | 248 | ||
| 239 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | 249 | error = xfs_finobt_calc_reserves(mp, agno, &ask, &used); |
| 240 | ask, used); | ||
| 241 | if (error) | 250 | if (error) |
| 242 | goto out; | 251 | goto out; |
| 252 | |||
| 253 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
| 254 | ask, used); | ||
| 255 | if (error) { | ||
| 256 | /* | ||
| 257 | * Because we didn't have per-AG reservations when the | ||
| 258 | * finobt feature was added we might not be able to | ||
| 259 | * reserve all needed blocks. Warn and fall back to the | ||
| 260 | * old and potentially buggy code in that case, but | ||
| 261 | * ensure we do have the reservation for the refcountbt. | ||
| 262 | */ | ||
| 263 | ask = used = 0; | ||
| 264 | |||
| 265 | mp->m_inotbt_nores = true; | ||
| 266 | |||
| 267 | error = xfs_refcountbt_calc_reserves(mp, agno, &ask, | ||
| 268 | &used); | ||
| 269 | if (error) | ||
| 270 | goto out; | ||
| 271 | |||
| 272 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
| 273 | ask, used); | ||
| 274 | if (error) | ||
| 275 | goto out; | ||
| 276 | } | ||
| 243 | } | 277 | } |
| 244 | 278 | ||
| 245 | /* Create the AGFL metadata reservation */ | 279 | /* Create the AGFL metadata reservation */ |
| 246 | if (pag->pag_agfl_resv.ar_asked == 0) { | 280 | if (pag->pag_agfl_resv.ar_asked == 0) { |
| 247 | ask = used = 0; | 281 | ask = used = 0; |
| 248 | 282 | ||
| 249 | error = xfs_rmapbt_calc_reserves(pag->pag_mount, pag->pag_agno, | 283 | error = xfs_rmapbt_calc_reserves(mp, agno, &ask, &used); |
| 250 | &ask, &used); | ||
| 251 | if (error) | 284 | if (error) |
| 252 | goto out; | 285 | goto out; |
| 253 | 286 | ||
| @@ -256,9 +289,16 @@ xfs_ag_resv_init( | |||
| 256 | goto out; | 289 | goto out; |
| 257 | } | 290 | } |
| 258 | 291 | ||
| 292 | #ifdef DEBUG | ||
| 293 | /* need to read in the AGF for the ASSERT below to work */ | ||
| 294 | error = xfs_alloc_pagf_init(pag->pag_mount, NULL, pag->pag_agno, 0); | ||
| 295 | if (error) | ||
| 296 | return error; | ||
| 297 | |||
| 259 | ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + | 298 | ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + |
| 260 | xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= | 299 | xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved <= |
| 261 | pag->pagf_freeblks + pag->pagf_flcount); | 300 | pag->pagf_freeblks + pag->pagf_flcount); |
| 301 | #endif | ||
| 262 | out: | 302 | out: |
| 263 | return error; | 303 | return error; |
| 264 | } | 304 | } |
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index af1ecb19121e..6622d46ddec3 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c | |||
| @@ -131,9 +131,6 @@ xfs_attr_get( | |||
| 131 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 131 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| 132 | return -EIO; | 132 | return -EIO; |
| 133 | 133 | ||
| 134 | if (!xfs_inode_hasattr(ip)) | ||
| 135 | return -ENOATTR; | ||
| 136 | |||
| 137 | error = xfs_attr_args_init(&args, ip, name, flags); | 134 | error = xfs_attr_args_init(&args, ip, name, flags); |
| 138 | if (error) | 135 | if (error) |
| 139 | return error; | 136 | return error; |
| @@ -392,9 +389,6 @@ xfs_attr_remove( | |||
| 392 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) | 389 | if (XFS_FORCED_SHUTDOWN(dp->i_mount)) |
| 393 | return -EIO; | 390 | return -EIO; |
| 394 | 391 | ||
| 395 | if (!xfs_inode_hasattr(dp)) | ||
| 396 | return -ENOATTR; | ||
| 397 | |||
| 398 | error = xfs_attr_args_init(&args, dp, name, flags); | 392 | error = xfs_attr_args_init(&args, dp, name, flags); |
| 399 | if (error) | 393 | if (error) |
| 400 | return error; | 394 | return error; |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 44773c9eb957..bfc00de5c6f1 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
| @@ -3629,7 +3629,7 @@ xfs_bmap_btalloc( | |||
| 3629 | align = xfs_get_cowextsz_hint(ap->ip); | 3629 | align = xfs_get_cowextsz_hint(ap->ip); |
| 3630 | else if (xfs_alloc_is_userdata(ap->datatype)) | 3630 | else if (xfs_alloc_is_userdata(ap->datatype)) |
| 3631 | align = xfs_get_extsz_hint(ap->ip); | 3631 | align = xfs_get_extsz_hint(ap->ip); |
| 3632 | if (unlikely(align)) { | 3632 | if (align) { |
| 3633 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, | 3633 | error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, |
| 3634 | align, 0, ap->eof, 0, ap->conv, | 3634 | align, 0, ap->eof, 0, ap->conv, |
| 3635 | &ap->offset, &ap->length); | 3635 | &ap->offset, &ap->length); |
| @@ -3701,7 +3701,7 @@ xfs_bmap_btalloc( | |||
| 3701 | args.minlen = ap->minlen; | 3701 | args.minlen = ap->minlen; |
| 3702 | } | 3702 | } |
| 3703 | /* apply extent size hints if obtained earlier */ | 3703 | /* apply extent size hints if obtained earlier */ |
| 3704 | if (unlikely(align)) { | 3704 | if (align) { |
| 3705 | args.prod = align; | 3705 | args.prod = align; |
| 3706 | if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) | 3706 | if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod))) |
| 3707 | args.mod = (xfs_extlen_t)(args.prod - args.mod); | 3707 | args.mod = (xfs_extlen_t)(args.prod - args.mod); |
| @@ -4514,8 +4514,6 @@ xfs_bmapi_write( | |||
| 4514 | int n; /* current extent index */ | 4514 | int n; /* current extent index */ |
| 4515 | xfs_fileoff_t obno; /* old block number (offset) */ | 4515 | xfs_fileoff_t obno; /* old block number (offset) */ |
| 4516 | int whichfork; /* data or attr fork */ | 4516 | int whichfork; /* data or attr fork */ |
| 4517 | char inhole; /* current location is hole in file */ | ||
| 4518 | char wasdelay; /* old extent was delayed */ | ||
| 4519 | 4517 | ||
| 4520 | #ifdef DEBUG | 4518 | #ifdef DEBUG |
| 4521 | xfs_fileoff_t orig_bno; /* original block number value */ | 4519 | xfs_fileoff_t orig_bno; /* original block number value */ |
| @@ -4603,22 +4601,44 @@ xfs_bmapi_write( | |||
| 4603 | bma.firstblock = firstblock; | 4601 | bma.firstblock = firstblock; |
| 4604 | 4602 | ||
| 4605 | while (bno < end && n < *nmap) { | 4603 | while (bno < end && n < *nmap) { |
| 4606 | inhole = eof || bma.got.br_startoff > bno; | 4604 | bool need_alloc = false, wasdelay = false; |
| 4607 | wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); | ||
| 4608 | 4605 | ||
| 4609 | /* | 4606 | /* in hole or beyoned EOF? */ |
| 4610 | * Make sure we only reflink into a hole. | 4607 | if (eof || bma.got.br_startoff > bno) { |
| 4611 | */ | 4608 | if (flags & XFS_BMAPI_DELALLOC) { |
| 4612 | if (flags & XFS_BMAPI_REMAP) | 4609 | /* |
| 4613 | ASSERT(inhole); | 4610 | * For the COW fork we can reasonably get a |
| 4614 | if (flags & XFS_BMAPI_COWFORK) | 4611 | * request for converting an extent that races |
| 4615 | ASSERT(!inhole); | 4612 | * with other threads already having converted |
| 4613 | * part of it, as there converting COW to | ||
| 4614 | * regular blocks is not protected using the | ||
| 4615 | * IOLOCK. | ||
| 4616 | */ | ||
| 4617 | ASSERT(flags & XFS_BMAPI_COWFORK); | ||
| 4618 | if (!(flags & XFS_BMAPI_COWFORK)) { | ||
| 4619 | error = -EIO; | ||
| 4620 | goto error0; | ||
| 4621 | } | ||
| 4622 | |||
| 4623 | if (eof || bno >= end) | ||
| 4624 | break; | ||
| 4625 | } else { | ||
| 4626 | need_alloc = true; | ||
| 4627 | } | ||
| 4628 | } else { | ||
| 4629 | /* | ||
| 4630 | * Make sure we only reflink into a hole. | ||
| 4631 | */ | ||
| 4632 | ASSERT(!(flags & XFS_BMAPI_REMAP)); | ||
| 4633 | if (isnullstartblock(bma.got.br_startblock)) | ||
| 4634 | wasdelay = true; | ||
| 4635 | } | ||
| 4616 | 4636 | ||
| 4617 | /* | 4637 | /* |
| 4618 | * First, deal with the hole before the allocated space | 4638 | * First, deal with the hole before the allocated space |
| 4619 | * that we found, if any. | 4639 | * that we found, if any. |
| 4620 | */ | 4640 | */ |
| 4621 | if (inhole || wasdelay) { | 4641 | if (need_alloc || wasdelay) { |
| 4622 | bma.eof = eof; | 4642 | bma.eof = eof; |
| 4623 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); | 4643 | bma.conv = !!(flags & XFS_BMAPI_CONVERT); |
| 4624 | bma.wasdel = wasdelay; | 4644 | bma.wasdel = wasdelay; |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index cecd094404cc..cdef87db5262 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
| @@ -110,6 +110,9 @@ struct xfs_extent_free_item | |||
| 110 | /* Map something in the CoW fork. */ | 110 | /* Map something in the CoW fork. */ |
| 111 | #define XFS_BMAPI_COWFORK 0x200 | 111 | #define XFS_BMAPI_COWFORK 0x200 |
| 112 | 112 | ||
| 113 | /* Only convert delalloc space, don't allocate entirely new extents */ | ||
| 114 | #define XFS_BMAPI_DELALLOC 0x400 | ||
| 115 | |||
| 113 | #define XFS_BMAPI_FLAGS \ | 116 | #define XFS_BMAPI_FLAGS \ |
| 114 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ | 117 | { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ |
| 115 | { XFS_BMAPI_METADATA, "METADATA" }, \ | 118 | { XFS_BMAPI_METADATA, "METADATA" }, \ |
| @@ -120,7 +123,8 @@ struct xfs_extent_free_item | |||
| 120 | { XFS_BMAPI_CONVERT, "CONVERT" }, \ | 123 | { XFS_BMAPI_CONVERT, "CONVERT" }, \ |
| 121 | { XFS_BMAPI_ZERO, "ZERO" }, \ | 124 | { XFS_BMAPI_ZERO, "ZERO" }, \ |
| 122 | { XFS_BMAPI_REMAP, "REMAP" }, \ | 125 | { XFS_BMAPI_REMAP, "REMAP" }, \ |
| 123 | { XFS_BMAPI_COWFORK, "COWFORK" } | 126 | { XFS_BMAPI_COWFORK, "COWFORK" }, \ |
| 127 | { XFS_BMAPI_DELALLOC, "DELALLOC" } | ||
| 124 | 128 | ||
| 125 | 129 | ||
| 126 | static inline int xfs_bmapi_aflag(int w) | 130 | static inline int xfs_bmapi_aflag(int w) |
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 0fd086d03d41..7c471881c9a6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | |||
| @@ -82,11 +82,12 @@ xfs_finobt_set_root( | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | STATIC int | 84 | STATIC int |
| 85 | xfs_inobt_alloc_block( | 85 | __xfs_inobt_alloc_block( |
| 86 | struct xfs_btree_cur *cur, | 86 | struct xfs_btree_cur *cur, |
| 87 | union xfs_btree_ptr *start, | 87 | union xfs_btree_ptr *start, |
| 88 | union xfs_btree_ptr *new, | 88 | union xfs_btree_ptr *new, |
| 89 | int *stat) | 89 | int *stat, |
| 90 | enum xfs_ag_resv_type resv) | ||
| 90 | { | 91 | { |
| 91 | xfs_alloc_arg_t args; /* block allocation args */ | 92 | xfs_alloc_arg_t args; /* block allocation args */ |
| 92 | int error; /* error return value */ | 93 | int error; /* error return value */ |
| @@ -103,6 +104,7 @@ xfs_inobt_alloc_block( | |||
| 103 | args.maxlen = 1; | 104 | args.maxlen = 1; |
| 104 | args.prod = 1; | 105 | args.prod = 1; |
| 105 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 106 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
| 107 | args.resv = resv; | ||
| 106 | 108 | ||
| 107 | error = xfs_alloc_vextent(&args); | 109 | error = xfs_alloc_vextent(&args); |
| 108 | if (error) { | 110 | if (error) { |
| @@ -123,6 +125,27 @@ xfs_inobt_alloc_block( | |||
| 123 | } | 125 | } |
| 124 | 126 | ||
| 125 | STATIC int | 127 | STATIC int |
| 128 | xfs_inobt_alloc_block( | ||
| 129 | struct xfs_btree_cur *cur, | ||
| 130 | union xfs_btree_ptr *start, | ||
| 131 | union xfs_btree_ptr *new, | ||
| 132 | int *stat) | ||
| 133 | { | ||
| 134 | return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); | ||
| 135 | } | ||
| 136 | |||
| 137 | STATIC int | ||
| 138 | xfs_finobt_alloc_block( | ||
| 139 | struct xfs_btree_cur *cur, | ||
| 140 | union xfs_btree_ptr *start, | ||
| 141 | union xfs_btree_ptr *new, | ||
| 142 | int *stat) | ||
| 143 | { | ||
| 144 | return __xfs_inobt_alloc_block(cur, start, new, stat, | ||
| 145 | XFS_AG_RESV_METADATA); | ||
| 146 | } | ||
| 147 | |||
| 148 | STATIC int | ||
| 126 | xfs_inobt_free_block( | 149 | xfs_inobt_free_block( |
| 127 | struct xfs_btree_cur *cur, | 150 | struct xfs_btree_cur *cur, |
| 128 | struct xfs_buf *bp) | 151 | struct xfs_buf *bp) |
| @@ -328,7 +351,7 @@ static const struct xfs_btree_ops xfs_finobt_ops = { | |||
| 328 | 351 | ||
| 329 | .dup_cursor = xfs_inobt_dup_cursor, | 352 | .dup_cursor = xfs_inobt_dup_cursor, |
| 330 | .set_root = xfs_finobt_set_root, | 353 | .set_root = xfs_finobt_set_root, |
| 331 | .alloc_block = xfs_inobt_alloc_block, | 354 | .alloc_block = xfs_finobt_alloc_block, |
| 332 | .free_block = xfs_inobt_free_block, | 355 | .free_block = xfs_inobt_free_block, |
| 333 | .get_minrecs = xfs_inobt_get_minrecs, | 356 | .get_minrecs = xfs_inobt_get_minrecs, |
| 334 | .get_maxrecs = xfs_inobt_get_maxrecs, | 357 | .get_maxrecs = xfs_inobt_get_maxrecs, |
| @@ -480,3 +503,64 @@ xfs_inobt_rec_check_count( | |||
| 480 | return 0; | 503 | return 0; |
| 481 | } | 504 | } |
| 482 | #endif /* DEBUG */ | 505 | #endif /* DEBUG */ |
| 506 | |||
| 507 | static xfs_extlen_t | ||
| 508 | xfs_inobt_max_size( | ||
| 509 | struct xfs_mount *mp) | ||
| 510 | { | ||
| 511 | /* Bail out if we're uninitialized, which can happen in mkfs. */ | ||
| 512 | if (mp->m_inobt_mxr[0] == 0) | ||
| 513 | return 0; | ||
| 514 | |||
| 515 | return xfs_btree_calc_size(mp, mp->m_inobt_mnr, | ||
| 516 | (uint64_t)mp->m_sb.sb_agblocks * mp->m_sb.sb_inopblock / | ||
| 517 | XFS_INODES_PER_CHUNK); | ||
| 518 | } | ||
| 519 | |||
| 520 | static int | ||
| 521 | xfs_inobt_count_blocks( | ||
| 522 | struct xfs_mount *mp, | ||
| 523 | xfs_agnumber_t agno, | ||
| 524 | xfs_btnum_t btnum, | ||
| 525 | xfs_extlen_t *tree_blocks) | ||
| 526 | { | ||
| 527 | struct xfs_buf *agbp; | ||
| 528 | struct xfs_btree_cur *cur; | ||
| 529 | int error; | ||
| 530 | |||
| 531 | error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); | ||
| 532 | if (error) | ||
| 533 | return error; | ||
| 534 | |||
| 535 | cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, btnum); | ||
| 536 | error = xfs_btree_count_blocks(cur, tree_blocks); | ||
| 537 | xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
| 538 | xfs_buf_relse(agbp); | ||
| 539 | |||
| 540 | return error; | ||
| 541 | } | ||
| 542 | |||
| 543 | /* | ||
| 544 | * Figure out how many blocks to reserve and how many are used by this btree. | ||
| 545 | */ | ||
| 546 | int | ||
| 547 | xfs_finobt_calc_reserves( | ||
| 548 | struct xfs_mount *mp, | ||
| 549 | xfs_agnumber_t agno, | ||
| 550 | xfs_extlen_t *ask, | ||
| 551 | xfs_extlen_t *used) | ||
| 552 | { | ||
| 553 | xfs_extlen_t tree_len = 0; | ||
| 554 | int error; | ||
| 555 | |||
| 556 | if (!xfs_sb_version_hasfinobt(&mp->m_sb)) | ||
| 557 | return 0; | ||
| 558 | |||
| 559 | error = xfs_inobt_count_blocks(mp, agno, XFS_BTNUM_FINO, &tree_len); | ||
| 560 | if (error) | ||
| 561 | return error; | ||
| 562 | |||
| 563 | *ask += xfs_inobt_max_size(mp); | ||
| 564 | *used += tree_len; | ||
| 565 | return 0; | ||
| 566 | } | ||
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index bd88453217ce..aa81e2e63f3f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h | |||
| @@ -72,4 +72,7 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, | |||
| 72 | #define xfs_inobt_rec_check_count(mp, rec) 0 | 72 | #define xfs_inobt_rec_check_count(mp, rec) 0 |
| 73 | #endif /* DEBUG */ | 73 | #endif /* DEBUG */ |
| 74 | 74 | ||
| 75 | int xfs_finobt_calc_reserves(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
| 76 | xfs_extlen_t *ask, xfs_extlen_t *used); | ||
| 77 | |||
| 75 | #endif /* __XFS_IALLOC_BTREE_H__ */ | 78 | #endif /* __XFS_IALLOC_BTREE_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 2580262e4ea0..584ec896a533 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c | |||
| @@ -242,7 +242,7 @@ xfs_mount_validate_sb( | |||
| 242 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || | 242 | sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || |
| 243 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || | 243 | sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || |
| 244 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || | 244 | sbp->sb_blocksize != (1 << sbp->sb_blocklog) || |
| 245 | sbp->sb_dirblklog > XFS_MAX_BLOCKSIZE_LOG || | 245 | sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || |
| 246 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || | 246 | sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || |
| 247 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || | 247 | sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || |
| 248 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | 248 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index b9abce524c33..c1417919ab0a 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
| @@ -528,7 +528,6 @@ xfs_getbmap( | |||
| 528 | xfs_bmbt_irec_t *map; /* buffer for user's data */ | 528 | xfs_bmbt_irec_t *map; /* buffer for user's data */ |
| 529 | xfs_mount_t *mp; /* file system mount point */ | 529 | xfs_mount_t *mp; /* file system mount point */ |
| 530 | int nex; /* # of user extents can do */ | 530 | int nex; /* # of user extents can do */ |
| 531 | int nexleft; /* # of user extents left */ | ||
| 532 | int subnex; /* # of bmapi's can do */ | 531 | int subnex; /* # of bmapi's can do */ |
| 533 | int nmap; /* number of map entries */ | 532 | int nmap; /* number of map entries */ |
| 534 | struct getbmapx *out; /* output structure */ | 533 | struct getbmapx *out; /* output structure */ |
| @@ -686,10 +685,8 @@ xfs_getbmap( | |||
| 686 | goto out_free_map; | 685 | goto out_free_map; |
| 687 | } | 686 | } |
| 688 | 687 | ||
| 689 | nexleft = nex; | ||
| 690 | |||
| 691 | do { | 688 | do { |
| 692 | nmap = (nexleft > subnex) ? subnex : nexleft; | 689 | nmap = (nex> subnex) ? subnex : nex; |
| 693 | error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), | 690 | error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset), |
| 694 | XFS_BB_TO_FSB(mp, bmv->bmv_length), | 691 | XFS_BB_TO_FSB(mp, bmv->bmv_length), |
| 695 | map, &nmap, bmapi_flags); | 692 | map, &nmap, bmapi_flags); |
| @@ -697,8 +694,8 @@ xfs_getbmap( | |||
| 697 | goto out_free_map; | 694 | goto out_free_map; |
| 698 | ASSERT(nmap <= subnex); | 695 | ASSERT(nmap <= subnex); |
| 699 | 696 | ||
| 700 | for (i = 0; i < nmap && nexleft && bmv->bmv_length && | 697 | for (i = 0; i < nmap && bmv->bmv_length && |
| 701 | cur_ext < bmv->bmv_count; i++) { | 698 | cur_ext < bmv->bmv_count - 1; i++) { |
| 702 | out[cur_ext].bmv_oflags = 0; | 699 | out[cur_ext].bmv_oflags = 0; |
| 703 | if (map[i].br_state == XFS_EXT_UNWRITTEN) | 700 | if (map[i].br_state == XFS_EXT_UNWRITTEN) |
| 704 | out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; | 701 | out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC; |
| @@ -760,16 +757,27 @@ xfs_getbmap( | |||
| 760 | continue; | 757 | continue; |
| 761 | } | 758 | } |
| 762 | 759 | ||
| 760 | /* | ||
| 761 | * In order to report shared extents accurately, | ||
| 762 | * we report each distinct shared/unshared part | ||
| 763 | * of a single bmbt record using multiple bmap | ||
| 764 | * extents. To make that happen, we iterate the | ||
| 765 | * same map array item multiple times, each | ||
| 766 | * time trimming out the subextent that we just | ||
| 767 | * reported. | ||
| 768 | * | ||
| 769 | * Because of this, we must check the out array | ||
| 770 | * index (cur_ext) directly against bmv_count-1 | ||
| 771 | * to avoid overflows. | ||
| 772 | */ | ||
| 763 | if (inject_map.br_startblock != NULLFSBLOCK) { | 773 | if (inject_map.br_startblock != NULLFSBLOCK) { |
| 764 | map[i] = inject_map; | 774 | map[i] = inject_map; |
| 765 | i--; | 775 | i--; |
| 766 | } else | 776 | } |
| 767 | nexleft--; | ||
| 768 | bmv->bmv_entries++; | 777 | bmv->bmv_entries++; |
| 769 | cur_ext++; | 778 | cur_ext++; |
| 770 | } | 779 | } |
| 771 | } while (nmap && nexleft && bmv->bmv_length && | 780 | } while (nmap && bmv->bmv_length && cur_ext < bmv->bmv_count - 1); |
| 772 | cur_ext < bmv->bmv_count); | ||
| 773 | 781 | ||
| 774 | out_free_map: | 782 | out_free_map: |
| 775 | kmem_free(map); | 783 | kmem_free(map); |
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7f0a01f7b592..ac3b4db519df 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
| @@ -422,6 +422,7 @@ retry: | |||
| 422 | out_free_pages: | 422 | out_free_pages: |
| 423 | for (i = 0; i < bp->b_page_count; i++) | 423 | for (i = 0; i < bp->b_page_count; i++) |
| 424 | __free_page(bp->b_pages[i]); | 424 | __free_page(bp->b_pages[i]); |
| 425 | bp->b_flags &= ~_XBF_PAGES; | ||
| 425 | return error; | 426 | return error; |
| 426 | } | 427 | } |
| 427 | 428 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index b9557795eb74..de32f0fe47c8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -1792,22 +1792,23 @@ xfs_inactive_ifree( | |||
| 1792 | int error; | 1792 | int error; |
| 1793 | 1793 | ||
| 1794 | /* | 1794 | /* |
| 1795 | * The ifree transaction might need to allocate blocks for record | 1795 | * We try to use a per-AG reservation for any block needed by the finobt |
| 1796 | * insertion to the finobt. We don't want to fail here at ENOSPC, so | 1796 | * tree, but as the finobt feature predates the per-AG reservation |
| 1797 | * allow ifree to dip into the reserved block pool if necessary. | 1797 | * support a degraded file system might not have enough space for the |
| 1798 | * | 1798 | * reservation at mount time. In that case try to dip into the reserved |
| 1799 | * Freeing large sets of inodes generally means freeing inode chunks, | 1799 | * pool and pray. |
| 1800 | * directory and file data blocks, so this should be relatively safe. | ||
| 1801 | * Only under severe circumstances should it be possible to free enough | ||
| 1802 | * inodes to exhaust the reserve block pool via finobt expansion while | ||
| 1803 | * at the same time not creating free space in the filesystem. | ||
| 1804 | * | 1800 | * |
| 1805 | * Send a warning if the reservation does happen to fail, as the inode | 1801 | * Send a warning if the reservation does happen to fail, as the inode |
| 1806 | * now remains allocated and sits on the unlinked list until the fs is | 1802 | * now remains allocated and sits on the unlinked list until the fs is |
| 1807 | * repaired. | 1803 | * repaired. |
| 1808 | */ | 1804 | */ |
| 1809 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, | 1805 | if (unlikely(mp->m_inotbt_nores)) { |
| 1810 | XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp); | 1806 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, |
| 1807 | XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, | ||
| 1808 | &tp); | ||
| 1809 | } else { | ||
| 1810 | error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree, 0, 0, 0, &tp); | ||
| 1811 | } | ||
| 1811 | if (error) { | 1812 | if (error) { |
| 1812 | if (error == -ENOSPC) { | 1813 | if (error == -ENOSPC) { |
| 1813 | xfs_warn_ratelimited(mp, | 1814 | xfs_warn_ratelimited(mp, |
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 0d147428971e..1aa3abd67b36 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
| @@ -681,7 +681,7 @@ xfs_iomap_write_allocate( | |||
| 681 | xfs_trans_t *tp; | 681 | xfs_trans_t *tp; |
| 682 | int nimaps; | 682 | int nimaps; |
| 683 | int error = 0; | 683 | int error = 0; |
| 684 | int flags = 0; | 684 | int flags = XFS_BMAPI_DELALLOC; |
| 685 | int nres; | 685 | int nres; |
| 686 | 686 | ||
| 687 | if (whichfork == XFS_COW_FORK) | 687 | if (whichfork == XFS_COW_FORK) |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 84f785218907..7f351f706b7a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
| @@ -140,6 +140,7 @@ typedef struct xfs_mount { | |||
| 140 | int m_fixedfsid[2]; /* unchanged for life of FS */ | 140 | int m_fixedfsid[2]; /* unchanged for life of FS */ |
| 141 | uint m_dmevmask; /* DMI events for this FS */ | 141 | uint m_dmevmask; /* DMI events for this FS */ |
| 142 | __uint64_t m_flags; /* global mount flags */ | 142 | __uint64_t m_flags; /* global mount flags */ |
| 143 | bool m_inotbt_nores; /* no per-AG finobt resv. */ | ||
| 143 | int m_ialloc_inos; /* inodes in inode allocation */ | 144 | int m_ialloc_inos; /* inodes in inode allocation */ |
| 144 | int m_ialloc_blks; /* blocks in inode allocation */ | 145 | int m_ialloc_blks; /* blocks in inode allocation */ |
| 145 | int m_ialloc_min_blks;/* min blocks in sparse inode | 146 | int m_ialloc_min_blks;/* min blocks in sparse inode |
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 45e50ea90769..b669b123287b 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c | |||
| @@ -1177,7 +1177,8 @@ xfs_qm_dqusage_adjust( | |||
| 1177 | * the case in all other instances. It's OK that we do this because | 1177 | * the case in all other instances. It's OK that we do this because |
| 1178 | * quotacheck is done only at mount time. | 1178 | * quotacheck is done only at mount time. |
| 1179 | */ | 1179 | */ |
| 1180 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); | 1180 | error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, XFS_ILOCK_EXCL, |
| 1181 | &ip); | ||
| 1181 | if (error) { | 1182 | if (error) { |
| 1182 | *res = BULKSTAT_RV_NOTHING; | 1183 | *res = BULKSTAT_RV_NOTHING; |
| 1183 | return error; | 1184 | return error; |
