aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2016-10-02 18:52:31 -0400
committerDave Chinner <david@fromorbit.com>2016-10-02 18:52:31 -0400
commit79ad57612495744d3875a6fba25c467a87b3ad64 (patch)
tree34532ea5817090e1b650da0c67135d169e7c5e16 /fs/xfs
parentb036b97050a9799aaaee78a8501aa255a74a2db7 (diff)
parent3fd129b63fd062a0d8f5d55994a6e98896c20fa7 (diff)
Merge branch 'xfs-4.9-reflink-prep' into for-next
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c325
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.h35
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c112
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h8
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_btree.c59
-rw-r--r--fs/xfs/libxfs/xfs_btree.h28
-rw-r--r--fs/xfs/libxfs/xfs_defer.c79
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h10
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fsops.c2
-rw-r--r--fs/xfs/xfs_mount.h36
-rw-r--r--fs/xfs/xfs_rmap_item.c36
-rw-r--r--fs/xfs/xfs_rmap_item.h8
-rw-r--r--fs/xfs/xfs_super.c5
-rw-r--r--fs/xfs/xfs_trace.h75
-rw-r--r--fs/xfs/xfs_trans.c1
-rw-r--r--fs/xfs/xfs_trans_extfree.c3
20 files changed, 719 insertions, 116 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index fc593c869493..584e87e11cb6 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \
52 xfs_inode_fork.o \ 52 xfs_inode_fork.o \
53 xfs_inode_buf.o \ 53 xfs_inode_buf.o \
54 xfs_log_rlimit.o \ 54 xfs_log_rlimit.o \
55 xfs_ag_resv.o \
55 xfs_rmap.o \ 56 xfs_rmap.o \
56 xfs_rmap_btree.o \ 57 xfs_rmap_btree.o \
57 xfs_sb.o \ 58 xfs_sb.o \
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
new file mode 100644
index 000000000000..e3ae0f2b4294
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -0,0 +1,325 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#include "xfs.h"
21#include "xfs_fs.h"
22#include "xfs_shared.h"
23#include "xfs_format.h"
24#include "xfs_log_format.h"
25#include "xfs_trans_resv.h"
26#include "xfs_sb.h"
27#include "xfs_mount.h"
28#include "xfs_defer.h"
29#include "xfs_alloc.h"
30#include "xfs_error.h"
31#include "xfs_trace.h"
32#include "xfs_cksum.h"
33#include "xfs_trans.h"
34#include "xfs_bit.h"
35#include "xfs_bmap.h"
36#include "xfs_bmap_btree.h"
37#include "xfs_ag_resv.h"
38#include "xfs_trans_space.h"
39#include "xfs_rmap_btree.h"
40#include "xfs_btree.h"
41
42/*
43 * Per-AG Block Reservations
44 *
45 * For some kinds of allocation group metadata structures, it is advantageous
46 * to reserve a small number of blocks in each AG so that future expansions of
47 * that data structure do not encounter ENOSPC because errors during a btree
48 * split cause the filesystem to go offline.
49 *
50 * Prior to the introduction of reflink, this wasn't an issue because the free
51 * space btrees maintain a reserve of space (the AGFL) to handle any expansion
52 * that may be necessary; and allocations of other metadata (inodes, BMBT,
53 * dir/attr) aren't restricted to a single AG. However, with reflink it is
54 * possible to allocate all the space in an AG, have subsequent reflink/CoW
55 * activity expand the refcount btree, and discover that there's no space left
56 * to handle that expansion. Since we can calculate the maximum size of the
57 * refcount btree, we can reserve space for it and avoid ENOSPC.
58 *
59 * Handling per-AG reservations consists of three changes to the allocator's
60 * behavior: First, because these reservations are always needed, we decrease
61 * the ag_max_usable counter to reflect the size of the AG after the reserved
62 * blocks are taken. Second, the reservations must be reflected in the
63 * fdblocks count to maintain proper accounting. Third, each AG must maintain
64 * its own reserved block counter so that we can calculate the amount of space
65 * that must remain free to maintain the reservations. Fourth, the "remaining
66 * reserved blocks" count must be used when calculating the length of the
67 * longest free extent in an AG and to clamp maxlen in the per-AG allocation
68 * functions. In other words, we maintain a virtual allocation via in-core
69 * accounting tricks so that we don't have to clean up after a crash. :)
70 *
71 * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
72 * values via struct xfs_alloc_arg or directly to the xfs_free_extent
73 * function. It might seem a little funny to maintain a reservoir of blocks
74 * to feed another reservoir, but the AGFL only holds enough blocks to get
75 * through the next transaction. The per-AG reservation is to ensure (we
76 * hope) that each AG never runs out of blocks. Each data structure wanting
77 * to use the reservation system should update ask/used in xfs_ag_resv_init.
78 */
79
80/*
81 * Are we critically low on blocks? For now we'll define that as the number
82 * of blocks we can get our hands on being less than 10% of what we reserved
83 * or less than some arbitrary number (maximum btree height).
84 */
85bool
86xfs_ag_resv_critical(
87 struct xfs_perag *pag,
88 enum xfs_ag_resv_type type)
89{
90 xfs_extlen_t avail;
91 xfs_extlen_t orig;
92
93 switch (type) {
94 case XFS_AG_RESV_METADATA:
95 avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
96 orig = pag->pag_meta_resv.ar_asked;
97 break;
98 case XFS_AG_RESV_AGFL:
99 avail = pag->pagf_freeblks + pag->pagf_flcount -
100 pag->pag_meta_resv.ar_reserved;
101 orig = pag->pag_agfl_resv.ar_asked;
102 break;
103 default:
104 ASSERT(0);
105 return false;
106 }
107
108 trace_xfs_ag_resv_critical(pag, type, avail);
109
110 /* Critically low if less than 10% or max btree height remains. */
111 return avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS;
112}
113
114/*
115 * How many blocks are reserved but not used, and therefore must not be
116 * allocated away?
117 */
118xfs_extlen_t
119xfs_ag_resv_needed(
120 struct xfs_perag *pag,
121 enum xfs_ag_resv_type type)
122{
123 xfs_extlen_t len;
124
125 len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
126 switch (type) {
127 case XFS_AG_RESV_METADATA:
128 case XFS_AG_RESV_AGFL:
129 len -= xfs_perag_resv(pag, type)->ar_reserved;
130 break;
131 case XFS_AG_RESV_NONE:
132 /* empty */
133 break;
134 default:
135 ASSERT(0);
136 }
137
138 trace_xfs_ag_resv_needed(pag, type, len);
139
140 return len;
141}
142
143/* Clean out a reservation */
144static int
145__xfs_ag_resv_free(
146 struct xfs_perag *pag,
147 enum xfs_ag_resv_type type)
148{
149 struct xfs_ag_resv *resv;
150 xfs_extlen_t oldresv;
151 int error;
152
153 trace_xfs_ag_resv_free(pag, type, 0);
154
155 resv = xfs_perag_resv(pag, type);
156 pag->pag_mount->m_ag_max_usable += resv->ar_asked;
157 /*
158 * AGFL blocks are always considered "free", so whatever
159 * was reserved at mount time must be given back at umount.
160 */
161 if (type == XFS_AG_RESV_AGFL)
162 oldresv = resv->ar_orig_reserved;
163 else
164 oldresv = resv->ar_reserved;
165 error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
166 resv->ar_reserved = 0;
167 resv->ar_asked = 0;
168
169 if (error)
170 trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
171 error, _RET_IP_);
172 return error;
173}
174
175/* Free a per-AG reservation. */
176int
177xfs_ag_resv_free(
178 struct xfs_perag *pag)
179{
180 int error;
181 int err2;
182
183 error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
184 err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
185 if (err2 && !error)
186 error = err2;
187 return error;
188}
189
190static int
191__xfs_ag_resv_init(
192 struct xfs_perag *pag,
193 enum xfs_ag_resv_type type,
194 xfs_extlen_t ask,
195 xfs_extlen_t used)
196{
197 struct xfs_mount *mp = pag->pag_mount;
198 struct xfs_ag_resv *resv;
199 int error;
200
201 resv = xfs_perag_resv(pag, type);
202 if (used > ask)
203 ask = used;
204 resv->ar_asked = ask;
205 resv->ar_reserved = resv->ar_orig_reserved = ask - used;
206 mp->m_ag_max_usable -= ask;
207
208 trace_xfs_ag_resv_init(pag, type, ask);
209
210 error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
211 if (error)
212 trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
213 error, _RET_IP_);
214
215 return error;
216}
217
218/* Create a per-AG block reservation. */
219int
220xfs_ag_resv_init(
221 struct xfs_perag *pag)
222{
223 xfs_extlen_t ask;
224 xfs_extlen_t used;
225 int error = 0;
226
227 /* Create the metadata reservation. */
228 if (pag->pag_meta_resv.ar_asked == 0) {
229 ask = used = 0;
230
231 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
232 ask, used);
233 if (error)
234 goto out;
235 }
236
237 /* Create the AGFL metadata reservation */
238 if (pag->pag_agfl_resv.ar_asked == 0) {
239 ask = used = 0;
240
241 error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
242 if (error)
243 goto out;
244 }
245
246out:
247 return error;
248}
249
250/* Allocate a block from the reservation. */
251void
252xfs_ag_resv_alloc_extent(
253 struct xfs_perag *pag,
254 enum xfs_ag_resv_type type,
255 struct xfs_alloc_arg *args)
256{
257 struct xfs_ag_resv *resv;
258 xfs_extlen_t len;
259 uint field;
260
261 trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
262
263 switch (type) {
264 case XFS_AG_RESV_METADATA:
265 case XFS_AG_RESV_AGFL:
266 resv = xfs_perag_resv(pag, type);
267 break;
268 default:
269 ASSERT(0);
270 /* fall through */
271 case XFS_AG_RESV_NONE:
272 field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
273 XFS_TRANS_SB_FDBLOCKS;
274 xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
275 return;
276 }
277
278 len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
279 resv->ar_reserved -= len;
280 if (type == XFS_AG_RESV_AGFL)
281 return;
282 /* Allocations of reserved blocks only need on-disk sb updates... */
283 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
284 /* ...but non-reserved blocks need in-core and on-disk updates. */
285 if (args->len > len)
286 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
287 -((int64_t)args->len - len));
288}
289
290/* Free a block to the reservation. */
291void
292xfs_ag_resv_free_extent(
293 struct xfs_perag *pag,
294 enum xfs_ag_resv_type type,
295 struct xfs_trans *tp,
296 xfs_extlen_t len)
297{
298 xfs_extlen_t leftover;
299 struct xfs_ag_resv *resv;
300
301 trace_xfs_ag_resv_free_extent(pag, type, len);
302
303 switch (type) {
304 case XFS_AG_RESV_METADATA:
305 case XFS_AG_RESV_AGFL:
306 resv = xfs_perag_resv(pag, type);
307 break;
308 default:
309 ASSERT(0);
310 /* fall through */
311 case XFS_AG_RESV_NONE:
312 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
313 return;
314 }
315
316 leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
317 resv->ar_reserved += leftover;
318 if (type == XFS_AG_RESV_AGFL)
319 return;
320 /* Freeing into the reserved pool only requires on-disk update... */
321 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
322 /* ...but freeing beyond that requires in-core and on-disk update. */
323 if (len > leftover)
324 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
325}
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h
new file mode 100644
index 000000000000..8d6c687deef3
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_ag_resv.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) 2016 Oracle. All Rights Reserved.
3 *
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it would be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20#ifndef __XFS_AG_RESV_H__
21#define __XFS_AG_RESV_H__
22
23int xfs_ag_resv_free(struct xfs_perag *pag);
24int xfs_ag_resv_init(struct xfs_perag *pag);
25
26bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
27xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag,
28 enum xfs_ag_resv_type type);
29
30void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
31 struct xfs_alloc_arg *args);
32void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
33 struct xfs_trans *tp, xfs_extlen_t len);
34
35#endif /* __XFS_AG_RESV_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 05b5243d89f6..2620a86a756a 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -37,6 +37,7 @@
37#include "xfs_trans.h" 37#include "xfs_trans.h"
38#include "xfs_buf_item.h" 38#include "xfs_buf_item.h"
39#include "xfs_log.h" 39#include "xfs_log.h"
40#include "xfs_ag_resv.h"
40 41
41struct workqueue_struct *xfs_alloc_wq; 42struct workqueue_struct *xfs_alloc_wq;
42 43
@@ -74,14 +75,8 @@ xfs_prealloc_blocks(
74 * extents need to be actually allocated. To get around this, we explicitly set 75 * extents need to be actually allocated. To get around this, we explicitly set
75 * aside a few blocks which will not be reserved in delayed allocation. 76 * aside a few blocks which will not be reserved in delayed allocation.
76 * 77 *
77 * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist 78 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
78 * and 4 more to handle a potential split of the file's bmap btree. 79 * potential split of the file's bmap btree.
79 *
80 * When rmap is enabled, we must also be able to handle two rmap btree inserts
81 * to record both the file data extent and a new bmbt block. The bmbt block
82 * might not be in the same AG as the file data extent. In the worst case
83 * the bmap btree splits multiple levels and all the new blocks come from
84 * different AGs, so set aside enough to handle rmap btree splits in all AGs.
85 */ 80 */
86unsigned int 81unsigned int
87xfs_alloc_set_aside( 82xfs_alloc_set_aside(
@@ -90,8 +85,6 @@ xfs_alloc_set_aside(
90 unsigned int blocks; 85 unsigned int blocks;
91 86
92 blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); 87 blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
93 if (xfs_sb_version_hasrmapbt(&mp->m_sb))
94 blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
95 return blocks; 88 return blocks;
96} 89}
97 90
@@ -680,12 +673,29 @@ xfs_alloc_ag_vextent(
680 xfs_alloc_arg_t *args) /* argument structure for allocation */ 673 xfs_alloc_arg_t *args) /* argument structure for allocation */
681{ 674{
682 int error=0; 675 int error=0;
676 xfs_extlen_t reservation;
677 xfs_extlen_t oldmax;
683 678
684 ASSERT(args->minlen > 0); 679 ASSERT(args->minlen > 0);
685 ASSERT(args->maxlen > 0); 680 ASSERT(args->maxlen > 0);
686 ASSERT(args->minlen <= args->maxlen); 681 ASSERT(args->minlen <= args->maxlen);
687 ASSERT(args->mod < args->prod); 682 ASSERT(args->mod < args->prod);
688 ASSERT(args->alignment > 0); 683 ASSERT(args->alignment > 0);
684
685 /*
686 * Clamp maxlen to the amount of free space minus any reservations
687 * that have been made.
688 */
689 oldmax = args->maxlen;
690 reservation = xfs_ag_resv_needed(args->pag, args->resv);
691 if (args->maxlen > args->pag->pagf_freeblks - reservation)
692 args->maxlen = args->pag->pagf_freeblks - reservation;
693 if (args->maxlen == 0) {
694 args->agbno = NULLAGBLOCK;
695 args->maxlen = oldmax;
696 return 0;
697 }
698
689 /* 699 /*
690 * Branch to correct routine based on the type. 700 * Branch to correct routine based on the type.
691 */ 701 */
@@ -705,12 +715,14 @@ xfs_alloc_ag_vextent(
705 /* NOTREACHED */ 715 /* NOTREACHED */
706 } 716 }
707 717
718 args->maxlen = oldmax;
719
708 if (error || args->agbno == NULLAGBLOCK) 720 if (error || args->agbno == NULLAGBLOCK)
709 return error; 721 return error;
710 722
711 ASSERT(args->len >= args->minlen); 723 ASSERT(args->len >= args->minlen);
712 ASSERT(args->len <= args->maxlen); 724 ASSERT(args->len <= args->maxlen);
713 ASSERT(!args->wasfromfl || !args->isfl); 725 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
714 ASSERT(args->agbno % args->alignment == 0); 726 ASSERT(args->agbno % args->alignment == 0);
715 727
716 /* if not file data, insert new block into the reverse map btree */ 728 /* if not file data, insert new block into the reverse map btree */
@@ -732,12 +744,7 @@ xfs_alloc_ag_vextent(
732 args->agbno, args->len)); 744 args->agbno, args->len));
733 } 745 }
734 746
735 if (!args->isfl) { 747 xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
736 xfs_trans_mod_sb(args->tp, args->wasdel ?
737 XFS_TRANS_SB_RES_FDBLOCKS :
738 XFS_TRANS_SB_FDBLOCKS,
739 -((long)(args->len)));
740 }
741 748
742 XFS_STATS_INC(args->mp, xs_allocx); 749 XFS_STATS_INC(args->mp, xs_allocx);
743 XFS_STATS_ADD(args->mp, xs_allocb, args->len); 750 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
@@ -1583,6 +1590,7 @@ xfs_alloc_ag_vextent_small(
1583 int *stat) /* status: 0-freelist, 1-normal/none */ 1590 int *stat) /* status: 0-freelist, 1-normal/none */
1584{ 1591{
1585 struct xfs_owner_info oinfo; 1592 struct xfs_owner_info oinfo;
1593 struct xfs_perag *pag;
1586 int error; 1594 int error;
1587 xfs_agblock_t fbno; 1595 xfs_agblock_t fbno;
1588 xfs_extlen_t flen; 1596 xfs_extlen_t flen;
@@ -1600,7 +1608,8 @@ xfs_alloc_ag_vextent_small(
1600 * to respect minleft even when pulling from the 1608 * to respect minleft even when pulling from the
1601 * freelist. 1609 * freelist.
1602 */ 1610 */
1603 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1611 else if (args->minlen == 1 && args->alignment == 1 &&
1612 args->resv != XFS_AG_RESV_AGFL &&
1604 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1613 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1605 > args->minleft)) { 1614 > args->minleft)) {
1606 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1615 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
@@ -1629,13 +1638,18 @@ xfs_alloc_ag_vextent_small(
1629 /* 1638 /*
1630 * If we're feeding an AGFL block to something that 1639 * If we're feeding an AGFL block to something that
1631 * doesn't live in the free space, we need to clear 1640 * doesn't live in the free space, we need to clear
1632 * out the OWN_AG rmap. 1641 * out the OWN_AG rmap and add the block back to
1642 * the AGFL per-AG reservation.
1633 */ 1643 */
1634 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 1644 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
1635 error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1645 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1636 fbno, 1, &oinfo); 1646 fbno, 1, &oinfo);
1637 if (error) 1647 if (error)
1638 goto error0; 1648 goto error0;
1649 pag = xfs_perag_get(args->mp, args->agno);
1650 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
1651 args->tp, 1);
1652 xfs_perag_put(pag);
1639 1653
1640 *stat = 0; 1654 *stat = 0;
1641 return 0; 1655 return 0;
@@ -1683,7 +1697,7 @@ xfs_free_ag_extent(
1683 xfs_agblock_t bno, 1697 xfs_agblock_t bno,
1684 xfs_extlen_t len, 1698 xfs_extlen_t len,
1685 struct xfs_owner_info *oinfo, 1699 struct xfs_owner_info *oinfo,
1686 int isfl) 1700 enum xfs_ag_resv_type type)
1687{ 1701{
1688 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1702 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1689 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1703 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
@@ -1911,21 +1925,22 @@ xfs_free_ag_extent(
1911 */ 1925 */
1912 pag = xfs_perag_get(mp, agno); 1926 pag = xfs_perag_get(mp, agno);
1913 error = xfs_alloc_update_counters(tp, pag, agbp, len); 1927 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1928 xfs_ag_resv_free_extent(pag, type, tp, len);
1914 xfs_perag_put(pag); 1929 xfs_perag_put(pag);
1915 if (error) 1930 if (error)
1916 goto error0; 1931 goto error0;
1917 1932
1918 if (!isfl)
1919 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1920 XFS_STATS_INC(mp, xs_freex); 1933 XFS_STATS_INC(mp, xs_freex);
1921 XFS_STATS_ADD(mp, xs_freeb, len); 1934 XFS_STATS_ADD(mp, xs_freeb, len);
1922 1935
1923 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1936 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1937 haveleft, haveright);
1924 1938
1925 return 0; 1939 return 0;
1926 1940
1927 error0: 1941 error0:
1928 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); 1942 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
1943 -1, -1);
1929 if (bno_cur) 1944 if (bno_cur)
1930 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1945 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1931 if (cnt_cur) 1946 if (cnt_cur)
@@ -1950,21 +1965,43 @@ xfs_alloc_compute_maxlevels(
1950} 1965}
1951 1966
1952/* 1967/*
1953 * Find the length of the longest extent in an AG. 1968 * Find the length of the longest extent in an AG. The 'need' parameter
1969 * specifies how much space we're going to need for the AGFL and the
1970 * 'reserved' parameter tells us how many blocks in this AG are reserved for
1971 * other callers.
1954 */ 1972 */
1955xfs_extlen_t 1973xfs_extlen_t
1956xfs_alloc_longest_free_extent( 1974xfs_alloc_longest_free_extent(
1957 struct xfs_mount *mp, 1975 struct xfs_mount *mp,
1958 struct xfs_perag *pag, 1976 struct xfs_perag *pag,
1959 xfs_extlen_t need) 1977 xfs_extlen_t need,
1978 xfs_extlen_t reserved)
1960{ 1979{
1961 xfs_extlen_t delta = 0; 1980 xfs_extlen_t delta = 0;
1962 1981
1982 /*
1983 * If the AGFL needs a recharge, we'll have to subtract that from the
1984 * longest extent.
1985 */
1963 if (need > pag->pagf_flcount) 1986 if (need > pag->pagf_flcount)
1964 delta = need - pag->pagf_flcount; 1987 delta = need - pag->pagf_flcount;
1965 1988
1989 /*
1990 * If we cannot maintain others' reservations with space from the
1991 * not-longest freesp extents, we'll have to subtract /that/ from
1992 * the longest extent too.
1993 */
1994 if (pag->pagf_freeblks - pag->pagf_longest < reserved)
1995 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
1996
1997 /*
1998 * If the longest extent is long enough to satisfy all the
1999 * reservations and AGFL rules in place, we can return this extent.
2000 */
1966 if (pag->pagf_longest > delta) 2001 if (pag->pagf_longest > delta)
1967 return pag->pagf_longest - delta; 2002 return pag->pagf_longest - delta;
2003
2004 /* Otherwise, let the caller try for 1 block if there's space. */
1968 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 2005 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1969} 2006}
1970 2007
@@ -2004,20 +2041,24 @@ xfs_alloc_space_available(
2004{ 2041{
2005 struct xfs_perag *pag = args->pag; 2042 struct xfs_perag *pag = args->pag;
2006 xfs_extlen_t longest; 2043 xfs_extlen_t longest;
2044 xfs_extlen_t reservation; /* blocks that are still reserved */
2007 int available; 2045 int available;
2008 2046
2009 if (flags & XFS_ALLOC_FLAG_FREEING) 2047 if (flags & XFS_ALLOC_FLAG_FREEING)
2010 return true; 2048 return true;
2011 2049
2050 reservation = xfs_ag_resv_needed(pag, args->resv);
2051
2012 /* do we have enough contiguous free space for the allocation? */ 2052 /* do we have enough contiguous free space for the allocation? */
2013 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free); 2053 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
2054 reservation);
2014 if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) 2055 if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
2015 return false; 2056 return false;
2016 2057
2017 /* do have enough free space remaining for the allocation? */ 2058 /* do we have enough free space remaining for the allocation? */
2018 available = (int)(pag->pagf_freeblks + pag->pagf_flcount - 2059 available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
2019 min_free - args->total); 2060 reservation - min_free - args->total);
2020 if (available < (int)args->minleft) 2061 if (available < (int)args->minleft || available <= 0)
2021 return false; 2062 return false;
2022 2063
2023 return true; 2064 return true;
@@ -2124,7 +2165,7 @@ xfs_alloc_fix_freelist(
2124 if (error) 2165 if (error)
2125 goto out_agbp_relse; 2166 goto out_agbp_relse;
2126 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 2167 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
2127 &targs.oinfo, 1); 2168 &targs.oinfo, XFS_AG_RESV_AGFL);
2128 if (error) 2169 if (error)
2129 goto out_agbp_relse; 2170 goto out_agbp_relse;
2130 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2171 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
@@ -2135,7 +2176,7 @@ xfs_alloc_fix_freelist(
2135 targs.mp = mp; 2176 targs.mp = mp;
2136 targs.agbp = agbp; 2177 targs.agbp = agbp;
2137 targs.agno = args->agno; 2178 targs.agno = args->agno;
2138 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 2179 targs.alignment = targs.minlen = targs.prod = 1;
2139 targs.type = XFS_ALLOCTYPE_THIS_AG; 2180 targs.type = XFS_ALLOCTYPE_THIS_AG;
2140 targs.pag = pag; 2181 targs.pag = pag;
2141 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); 2182 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
@@ -2146,6 +2187,7 @@ xfs_alloc_fix_freelist(
2146 while (pag->pagf_flcount < need) { 2187 while (pag->pagf_flcount < need) {
2147 targs.agbno = 0; 2188 targs.agbno = 0;
2148 targs.maxlen = need - pag->pagf_flcount; 2189 targs.maxlen = need - pag->pagf_flcount;
2190 targs.resv = XFS_AG_RESV_AGFL;
2149 2191
2150 /* Allocate as many blocks as possible at once. */ 2192 /* Allocate as many blocks as possible at once. */
2151 error = xfs_alloc_ag_vextent(&targs); 2193 error = xfs_alloc_ag_vextent(&targs);
@@ -2825,7 +2867,8 @@ xfs_free_extent(
2825 struct xfs_trans *tp, /* transaction pointer */ 2867 struct xfs_trans *tp, /* transaction pointer */
2826 xfs_fsblock_t bno, /* starting block number of extent */ 2868 xfs_fsblock_t bno, /* starting block number of extent */
2827 xfs_extlen_t len, /* length of extent */ 2869 xfs_extlen_t len, /* length of extent */
2828 struct xfs_owner_info *oinfo) /* extent owner */ 2870 struct xfs_owner_info *oinfo, /* extent owner */
2871 enum xfs_ag_resv_type type) /* block reservation type */
2829{ 2872{
2830 struct xfs_mount *mp = tp->t_mountp; 2873 struct xfs_mount *mp = tp->t_mountp;
2831 struct xfs_buf *agbp; 2874 struct xfs_buf *agbp;
@@ -2834,6 +2877,7 @@ xfs_free_extent(
2834 int error; 2877 int error;
2835 2878
2836 ASSERT(len != 0); 2879 ASSERT(len != 0);
2880 ASSERT(type != XFS_AG_RESV_AGFL);
2837 2881
2838 if (XFS_TEST_ERROR(false, mp, 2882 if (XFS_TEST_ERROR(false, mp,
2839 XFS_ERRTAG_FREE_EXTENT, 2883 XFS_ERRTAG_FREE_EXTENT,
@@ -2851,7 +2895,7 @@ xfs_free_extent(
2851 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 2895 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
2852 err); 2896 err);
2853 2897
2854 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0); 2898 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
2855 if (error) 2899 if (error)
2856 goto err; 2900 goto err;
2857 2901
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 6fe2d6b7cfe9..f7c520193239 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -87,10 +87,10 @@ typedef struct xfs_alloc_arg {
87 xfs_alloctype_t otype; /* original allocation type */ 87 xfs_alloctype_t otype; /* original allocation type */
88 char wasdel; /* set if allocation was prev delayed */ 88 char wasdel; /* set if allocation was prev delayed */
89 char wasfromfl; /* set if allocation is from freelist */ 89 char wasfromfl; /* set if allocation is from freelist */
90 char isfl; /* set if is freelist blocks - !acctg */
91 char userdata; /* mask defining userdata treatment */ 90 char userdata; /* mask defining userdata treatment */
92 xfs_fsblock_t firstblock; /* io first block allocated */ 91 xfs_fsblock_t firstblock; /* io first block allocated */
93 struct xfs_owner_info oinfo; /* owner of blocks being allocated */ 92 struct xfs_owner_info oinfo; /* owner of blocks being allocated */
93 enum xfs_ag_resv_type resv; /* block reservation to use */
94} xfs_alloc_arg_t; 94} xfs_alloc_arg_t;
95 95
96/* 96/*
@@ -106,7 +106,8 @@ unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
106unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); 106unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
107 107
108xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, 108xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
109 struct xfs_perag *pag, xfs_extlen_t need); 109 struct xfs_perag *pag, xfs_extlen_t need,
110 xfs_extlen_t reserved);
110unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, 111unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
111 struct xfs_perag *pag); 112 struct xfs_perag *pag);
112 113
@@ -184,7 +185,8 @@ xfs_free_extent(
184 struct xfs_trans *tp, /* transaction pointer */ 185 struct xfs_trans *tp, /* transaction pointer */
185 xfs_fsblock_t bno, /* starting block number of extent */ 186 xfs_fsblock_t bno, /* starting block number of extent */
186 xfs_extlen_t len, /* length of extent */ 187 xfs_extlen_t len, /* length of extent */
187 struct xfs_owner_info *oinfo);/* extent owner */ 188 struct xfs_owner_info *oinfo, /* extent owner */
189 enum xfs_ag_resv_type type); /* block reservation type */
188 190
189int /* error */ 191int /* error */
190xfs_alloc_lookup_ge( 192xfs_alloc_lookup_ge(
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index b060bca93402..042d7bf9fb60 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -47,6 +47,7 @@
47#include "xfs_attr_leaf.h" 47#include "xfs_attr_leaf.h"
48#include "xfs_filestream.h" 48#include "xfs_filestream.h"
49#include "xfs_rmap.h" 49#include "xfs_rmap.h"
50#include "xfs_ag_resv.h"
50 51
51 52
52kmem_zone_t *xfs_bmap_free_item_zone; 53kmem_zone_t *xfs_bmap_free_item_zone;
@@ -3501,7 +3502,8 @@ xfs_bmap_longest_free_extent(
3501 } 3502 }
3502 3503
3503 longest = xfs_alloc_longest_free_extent(mp, pag, 3504 longest = xfs_alloc_longest_free_extent(mp, pag,
3504 xfs_alloc_min_freelist(mp, pag)); 3505 xfs_alloc_min_freelist(mp, pag),
3506 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3505 if (*blen < longest) 3507 if (*blen < longest)
3506 *blen = longest; 3508 *blen = longest;
3507 3509
@@ -3781,7 +3783,7 @@ xfs_bmap_btalloc(
3781 } 3783 }
3782 args.minleft = ap->minleft; 3784 args.minleft = ap->minleft;
3783 args.wasdel = ap->wasdel; 3785 args.wasdel = ap->wasdel;
3784 args.isfl = 0; 3786 args.resv = XFS_AG_RESV_NONE;
3785 args.userdata = ap->userdata; 3787 args.userdata = ap->userdata;
3786 if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) 3788 if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
3787 args.ip = ap->ip; 3789 args.ip = ap->ip;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 08569792fe20..aa1752f918b8 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2070,7 +2070,7 @@ __xfs_btree_updkeys(
2070 struct xfs_buf *bp0, 2070 struct xfs_buf *bp0,
2071 bool force_all) 2071 bool force_all)
2072{ 2072{
2073 union xfs_btree_bigkey key; /* keys from current level */ 2073 union xfs_btree_key key; /* keys from current level */
2074 union xfs_btree_key *lkey; /* keys from the next level up */ 2074 union xfs_btree_key *lkey; /* keys from the next level up */
2075 union xfs_btree_key *hkey; 2075 union xfs_btree_key *hkey;
2076 union xfs_btree_key *nlkey; /* keys from the next level up */ 2076 union xfs_btree_key *nlkey; /* keys from the next level up */
@@ -2086,7 +2086,7 @@ __xfs_btree_updkeys(
2086 2086
2087 trace_xfs_btree_updkeys(cur, level, bp0); 2087 trace_xfs_btree_updkeys(cur, level, bp0);
2088 2088
2089 lkey = (union xfs_btree_key *)&key; 2089 lkey = &key;
2090 hkey = xfs_btree_high_key_from_key(cur, lkey); 2090 hkey = xfs_btree_high_key_from_key(cur, lkey);
2091 xfs_btree_get_keys(cur, block, lkey); 2091 xfs_btree_get_keys(cur, block, lkey);
2092 for (level++; level < cur->bc_nlevels; level++) { 2092 for (level++; level < cur->bc_nlevels; level++) {
@@ -3226,7 +3226,7 @@ xfs_btree_insrec(
3226 struct xfs_buf *bp; /* buffer for block */ 3226 struct xfs_buf *bp; /* buffer for block */
3227 union xfs_btree_ptr nptr; /* new block ptr */ 3227 union xfs_btree_ptr nptr; /* new block ptr */
3228 struct xfs_btree_cur *ncur; /* new btree cursor */ 3228 struct xfs_btree_cur *ncur; /* new btree cursor */
3229 union xfs_btree_bigkey nkey; /* new block key */ 3229 union xfs_btree_key nkey; /* new block key */
3230 union xfs_btree_key *lkey; 3230 union xfs_btree_key *lkey;
3231 int optr; /* old key/record index */ 3231 int optr; /* old key/record index */
3232 int ptr; /* key/record index */ 3232 int ptr; /* key/record index */
@@ -3241,7 +3241,7 @@ xfs_btree_insrec(
3241 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); 3241 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
3242 3242
3243 ncur = NULL; 3243 ncur = NULL;
3244 lkey = (union xfs_btree_key *)&nkey; 3244 lkey = &nkey;
3245 3245
3246 /* 3246 /*
3247 * If we have an external root pointer, and we've made it to the 3247 * If we have an external root pointer, and we've made it to the
@@ -3444,14 +3444,14 @@ xfs_btree_insert(
3444 union xfs_btree_ptr nptr; /* new block number (split result) */ 3444 union xfs_btree_ptr nptr; /* new block number (split result) */
3445 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3445 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3446 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3446 struct xfs_btree_cur *pcur; /* previous level's cursor */
3447 union xfs_btree_bigkey bkey; /* key of block to insert */ 3447 union xfs_btree_key bkey; /* key of block to insert */
3448 union xfs_btree_key *key; 3448 union xfs_btree_key *key;
3449 union xfs_btree_rec rec; /* record to insert */ 3449 union xfs_btree_rec rec; /* record to insert */
3450 3450
3451 level = 0; 3451 level = 0;
3452 ncur = NULL; 3452 ncur = NULL;
3453 pcur = cur; 3453 pcur = cur;
3454 key = (union xfs_btree_key *)&bkey; 3454 key = &bkey;
3455 3455
3456 xfs_btree_set_ptr_null(cur, &nptr); 3456 xfs_btree_set_ptr_null(cur, &nptr);
3457 3457
@@ -4797,3 +4797,50 @@ xfs_btree_query_range(
4797 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4797 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4798 fn, priv); 4798 fn, priv);
4799} 4799}
4800
4801/*
4802 * Calculate the number of blocks needed to store a given number of records
4803 * in a short-format (per-AG metadata) btree.
4804 */
4805xfs_extlen_t
4806xfs_btree_calc_size(
4807 struct xfs_mount *mp,
4808 uint *limits,
4809 unsigned long long len)
4810{
4811 int level;
4812 int maxrecs;
4813 xfs_extlen_t rval;
4814
4815 maxrecs = limits[0];
4816 for (level = 0, rval = 0; len > 1; level++) {
4817 len += maxrecs - 1;
4818 do_div(len, maxrecs);
4819 maxrecs = limits[1];
4820 rval += len;
4821 }
4822 return rval;
4823}
4824
4825int
4826xfs_btree_count_blocks_helper(
4827 struct xfs_btree_cur *cur,
4828 int level,
4829 void *data)
4830{
4831 xfs_extlen_t *blocks = data;
4832 (*blocks)++;
4833
4834 return 0;
4835}
4836
4837/* Count the blocks in a btree and return the result in *blocks. */
4838int
4839xfs_btree_count_blocks(
4840 struct xfs_btree_cur *cur,
4841 xfs_extlen_t *blocks)
4842{
4843 *blocks = 0;
4844 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4845 blocks);
4846}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 04d0865e5e6d..3f8556a5c2ad 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -37,30 +37,18 @@ union xfs_btree_ptr {
37 __be64 l; /* long form ptr */ 37 __be64 l; /* long form ptr */
38}; 38};
39 39
40union xfs_btree_key {
41 struct xfs_bmbt_key bmbt;
42 xfs_bmdr_key_t bmbr; /* bmbt root block */
43 xfs_alloc_key_t alloc;
44 struct xfs_inobt_key inobt;
45 struct xfs_rmap_key rmap;
46};
47
48/* 40/*
49 * In-core key that holds both low and high keys for overlapped btrees. 41 * The in-core btree key. Overlapping btrees actually store two keys
50 * The two keys are packed next to each other on disk, so do the same 42 * per pointer, so we reserve enough memory to hold both. The __*bigkey
51 * in memory. Preserve the existing xfs_btree_key as a single key to 43 * items should never be accessed directly.
52 * avoid the mental model breakage that would happen if we passed a
53 * bigkey into a function that operates on a single key.
54 */ 44 */
55union xfs_btree_bigkey { 45union xfs_btree_key {
56 struct xfs_bmbt_key bmbt; 46 struct xfs_bmbt_key bmbt;
57 xfs_bmdr_key_t bmbr; /* bmbt root block */ 47 xfs_bmdr_key_t bmbr; /* bmbt root block */
58 xfs_alloc_key_t alloc; 48 xfs_alloc_key_t alloc;
59 struct xfs_inobt_key inobt; 49 struct xfs_inobt_key inobt;
60 struct { 50 struct xfs_rmap_key rmap;
61 struct xfs_rmap_key rmap; 51 struct xfs_rmap_key __rmap_bigkey[2];
62 struct xfs_rmap_key rmap_hi;
63 };
64}; 52};
65 53
66union xfs_btree_rec { 54union xfs_btree_rec {
@@ -513,6 +501,8 @@ bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
513bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); 501bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
514uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, 502uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
515 unsigned long len); 503 unsigned long len);
504xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
505 unsigned long long len);
516 506
517/* return codes */ 507/* return codes */
518#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ 508#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
@@ -529,4 +519,6 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
529int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, 519int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
530 xfs_btree_visit_blocks_fn fn, void *data); 520 xfs_btree_visit_blocks_fn fn, void *data);
531 521
522int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
523
532#endif /* __XFS_BTREE_H__ */ 524#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index c221d0ecd52e..613c5cf19436 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -81,6 +81,10 @@
81 * - For each work item attached to the log intent item, 81 * - For each work item attached to the log intent item,
82 * * Perform the described action. 82 * * Perform the described action.
83 * * Attach the work item to the log done item. 83 * * Attach the work item to the log done item.
84 * * If the result of doing the work was -EAGAIN, ->finish work
85 * wants a new transaction. See the "Requesting a Fresh
86 * Transaction while Finishing Deferred Work" section below for
87 * details.
84 * 88 *
85 * The key here is that we must log an intent item for all pending 89 * The key here is that we must log an intent item for all pending
86 * work items every time we roll the transaction, and that we must log 90 * work items every time we roll the transaction, and that we must log
@@ -88,6 +92,34 @@
88 * we can perform complex remapping operations, chaining intent items 92 * we can perform complex remapping operations, chaining intent items
89 * as needed. 93 * as needed.
90 * 94 *
95 * Requesting a Fresh Transaction while Finishing Deferred Work
96 *
97 * If ->finish_item decides that it needs a fresh transaction to
98 * finish the work, it must ask its caller (xfs_defer_finish) for a
99 * continuation. The most likely cause of this circumstance are the
100 * refcount adjust functions deciding that they've logged enough items
101 * to be at risk of exceeding the transaction reservation.
102 *
103 * To get a fresh transaction, we want to log the existing log done
104 * item to prevent the log intent item from replaying, immediately log
105 * a new log intent item with the unfinished work items, roll the
106 * transaction, and re-call ->finish_item wherever it left off. The
107 * log done item and the new log intent item must be in the same
108 * transaction or atomicity cannot be guaranteed; defer_finish ensures
109 * that this happens.
110 *
111 * This requires some coordination between ->finish_item and
112 * defer_finish. Upon deciding to request a new transaction,
113 * ->finish_item should update the current work item to reflect the
114 * unfinished work. Next, it should reset the log done item's list
115 * count to the number of items finished, and return -EAGAIN.
116 * defer_finish sees the -EAGAIN, logs the new log intent item
117 * with the remaining work items, and leaves the xfs_defer_pending
118 * item at the head of the dop_work queue. Then it rolls the
119 * transaction and picks up processing where it left off. It is
120 * required that ->finish_item must be careful to leave enough
121 * transaction reservation to fit the new log intent item.
122 *
91 * This is an example of remapping the extent (E, E+B) into file X at 123 * This is an example of remapping the extent (E, E+B) into file X at
92 * offset A and dealing with the extent (C, C+B) already being mapped 124 * offset A and dealing with the extent (C, C+B) already being mapped
93 * there: 125 * there:
@@ -104,21 +136,26 @@
104 * | Intent to add rmap (X, E, A, B) | 136 * | Intent to add rmap (X, E, A, B) |
105 * +-------------------------------------------------+ 137 * +-------------------------------------------------+
106 * | Reduce refcount for extent (C, B) | t2 138 * | Reduce refcount for extent (C, B) | t2
107 * | Done reducing refcount for extent (C, B) | 139 * | Done reducing refcount for extent (C, 9) |
140 * | Intent to reduce refcount for extent (C+9, B-9) |
141 * | (ran out of space after 9 refcount updates) |
142 * +-------------------------------------------------+
143 * | Reduce refcount for extent (C+9, B+9) | t3
144 * | Done reducing refcount for extent (C+9, B-9) |
108 * | Increase refcount for extent (E, B) | 145 * | Increase refcount for extent (E, B) |
109 * | Done increasing refcount for extent (E, B) | 146 * | Done increasing refcount for extent (E, B) |
110 * | Intent to free extent (C, B) | 147 * | Intent to free extent (C, B) |
111 * | Intent to free extent (F, 1) (refcountbt block) | 148 * | Intent to free extent (F, 1) (refcountbt block) |
112 * | Intent to remove rmap (F, 1, REFC) | 149 * | Intent to remove rmap (F, 1, REFC) |
113 * +-------------------------------------------------+ 150 * +-------------------------------------------------+
114 * | Remove rmap (X, C, A, B) | t3 151 * | Remove rmap (X, C, A, B) | t4
115 * | Done removing rmap (X, C, A, B) | 152 * | Done removing rmap (X, C, A, B) |
116 * | Add rmap (X, E, A, B) | 153 * | Add rmap (X, E, A, B) |
117 * | Done adding rmap (X, E, A, B) | 154 * | Done adding rmap (X, E, A, B) |
118 * | Remove rmap (F, 1, REFC) | 155 * | Remove rmap (F, 1, REFC) |
119 * | Done removing rmap (F, 1, REFC) | 156 * | Done removing rmap (F, 1, REFC) |
120 * +-------------------------------------------------+ 157 * +-------------------------------------------------+
121 * | Free extent (C, B) | t4 158 * | Free extent (C, B) | t5
122 * | Done freeing extent (C, B) | 159 * | Done freeing extent (C, B) |
123 * | Free extent (D, 1) | 160 * | Free extent (D, 1) |
124 * | Done freeing extent (D, 1) | 161 * | Done freeing extent (D, 1) |
@@ -141,6 +178,9 @@
141 * - Intent to free extent (C, B) 178 * - Intent to free extent (C, B)
142 * - Intent to free extent (F, 1) (refcountbt block) 179 * - Intent to free extent (F, 1) (refcountbt block)
143 * - Intent to remove rmap (F, 1, REFC) 180 * - Intent to remove rmap (F, 1, REFC)
181 *
182 * Note that the continuation requested between t2 and t3 is likely to
183 * reoccur.
144 */ 184 */
145 185
146static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX]; 186static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
@@ -323,7 +363,16 @@ xfs_defer_finish(
323 dfp->dfp_count--; 363 dfp->dfp_count--;
324 error = dfp->dfp_type->finish_item(*tp, dop, li, 364 error = dfp->dfp_type->finish_item(*tp, dop, li,
325 dfp->dfp_done, &state); 365 dfp->dfp_done, &state);
326 if (error) { 366 if (error == -EAGAIN) {
367 /*
368 * Caller wants a fresh transaction;
369 * put the work item back on the list
370 * and jump out.
371 */
372 list_add(li, &dfp->dfp_work);
373 dfp->dfp_count++;
374 break;
375 } else if (error) {
327 /* 376 /*
328 * Clean up after ourselves and jump out. 377 * Clean up after ourselves and jump out.
329 * xfs_defer_cancel will take care of freeing 378 * xfs_defer_cancel will take care of freeing
@@ -335,9 +384,25 @@ xfs_defer_finish(
335 goto out; 384 goto out;
336 } 385 }
337 } 386 }
338 /* Done with the dfp, free it. */ 387 if (error == -EAGAIN) {
339 list_del(&dfp->dfp_list); 388 /*
340 kmem_free(dfp); 389 * Caller wants a fresh transaction, so log a
390 * new log intent item to replace the old one
391 * and roll the transaction. See "Requesting
392 * a Fresh Transaction while Finishing
393 * Deferred Work" above.
394 */
395 dfp->dfp_intent = dfp->dfp_type->create_intent(*tp,
396 dfp->dfp_count);
397 dfp->dfp_done = NULL;
398 list_for_each(li, &dfp->dfp_work)
399 dfp->dfp_type->log_item(*tp, dfp->dfp_intent,
400 li);
401 } else {
402 /* Done with the dfp, free it. */
403 list_del(&dfp->dfp_list);
404 kmem_free(dfp);
405 }
341 406
342 if (cleanup_fn) 407 if (cleanup_fn)
343 cleanup_fn(*tp, state, error); 408 cleanup_fn(*tp, state, error);
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 31ca2208c03d..eab68ae2e011 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -132,7 +132,7 @@ xfs_inobt_free_block(
132 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); 132 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
133 return xfs_free_extent(cur->bc_tp, 133 return xfs_free_extent(cur->bc_tp,
134 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 134 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
135 &oinfo); 135 &oinfo, XFS_AG_RESV_NONE);
136} 136}
137 137
138STATIC int 138STATIC int
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index a6eed43fa7cd..fc5eef85d61e 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -647,9 +647,17 @@ struct xfs_rui_log_format {
647 __uint16_t rui_size; /* size of this item */ 647 __uint16_t rui_size; /* size of this item */
648 __uint32_t rui_nextents; /* # extents to free */ 648 __uint32_t rui_nextents; /* # extents to free */
649 __uint64_t rui_id; /* rui identifier */ 649 __uint64_t rui_id; /* rui identifier */
650 struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */ 650 struct xfs_map_extent rui_extents[]; /* array of extents to rmap */
651}; 651};
652 652
653static inline size_t
654xfs_rui_log_format_sizeof(
655 unsigned int nr)
656{
657 return sizeof(struct xfs_rui_log_format) +
658 nr * sizeof(struct xfs_map_extent);
659}
660
653/* 661/*
654 * This is the structure used to lay out an rud log item in the 662 * This is the structure used to lay out an rud log item in the
655 * log. The rud_extents array is a variable size array whose 663 * log. The rud_extents array is a variable size array whose
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 4a33a3304369..c8005fdaaa8a 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -30,6 +30,7 @@
30#include "xfs_mru_cache.h" 30#include "xfs_mru_cache.h"
31#include "xfs_filestream.h" 31#include "xfs_filestream.h"
32#include "xfs_trace.h" 32#include "xfs_trace.h"
33#include "xfs_ag_resv.h"
33 34
34struct xfs_fstrm_item { 35struct xfs_fstrm_item {
35 struct xfs_mru_cache_elem mru; 36 struct xfs_mru_cache_elem mru;
@@ -198,7 +199,8 @@ xfs_filestream_pick_ag(
198 } 199 }
199 200
200 longest = xfs_alloc_longest_free_extent(mp, pag, 201 longest = xfs_alloc_longest_free_extent(mp, pag,
201 xfs_alloc_min_freelist(mp, pag)); 202 xfs_alloc_min_freelist(mp, pag),
203 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
202 if (((minlen && longest >= minlen) || 204 if (((minlen && longest >= minlen) ||
203 (!minlen && pag->pagf_freeblks >= minfree)) && 205 (!minlen && pag->pagf_freeblks >= minfree)) &&
204 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || 206 (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 0b7f986745c1..94ac06f3d908 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -553,7 +553,7 @@ xfs_growfs_data_private(
553 error = xfs_free_extent(tp, 553 error = xfs_free_extent(tp,
554 XFS_AGB_TO_FSB(mp, agno, 554 XFS_AGB_TO_FSB(mp, agno,
555 be32_to_cpu(agf->agf_length) - new), 555 be32_to_cpu(agf->agf_length) - new),
556 new, &oinfo); 556 new, &oinfo, XFS_AG_RESV_NONE);
557 if (error) 557 if (error)
558 goto error0; 558 goto error0;
559 } 559 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index efd4a5526f37..041d9493e798 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -331,6 +331,22 @@ xfs_mp_fail_writes(struct xfs_mount *mp)
331} 331}
332#endif 332#endif
333 333
334/* per-AG block reservation data structures*/
335enum xfs_ag_resv_type {
336 XFS_AG_RESV_NONE = 0,
337 XFS_AG_RESV_METADATA,
338 XFS_AG_RESV_AGFL,
339};
340
341struct xfs_ag_resv {
342 /* number of blocks originally reserved here */
343 xfs_extlen_t ar_orig_reserved;
344 /* number of blocks reserved here */
345 xfs_extlen_t ar_reserved;
346 /* number of blocks originally asked for */
347 xfs_extlen_t ar_asked;
348};
349
334/* 350/*
335 * Per-ag incore structure, copies of information in agf and agi, to improve the 351 * Per-ag incore structure, copies of information in agf and agi, to improve the
336 * performance of allocation group selection. 352 * performance of allocation group selection.
@@ -378,8 +394,28 @@ typedef struct xfs_perag {
378 /* for rcu-safe freeing */ 394 /* for rcu-safe freeing */
379 struct rcu_head rcu_head; 395 struct rcu_head rcu_head;
380 int pagb_count; /* pagb slots in use */ 396 int pagb_count; /* pagb slots in use */
397
398 /* Blocks reserved for all kinds of metadata. */
399 struct xfs_ag_resv pag_meta_resv;
400 /* Blocks reserved for just AGFL-based metadata. */
401 struct xfs_ag_resv pag_agfl_resv;
381} xfs_perag_t; 402} xfs_perag_t;
382 403
404static inline struct xfs_ag_resv *
405xfs_perag_resv(
406 struct xfs_perag *pag,
407 enum xfs_ag_resv_type type)
408{
409 switch (type) {
410 case XFS_AG_RESV_METADATA:
411 return &pag->pag_meta_resv;
412 case XFS_AG_RESV_AGFL:
413 return &pag->pag_agfl_resv;
414 default:
415 return NULL;
416 }
417}
418
383extern void xfs_uuid_table_free(void); 419extern void xfs_uuid_table_free(void);
384extern int xfs_log_sbcount(xfs_mount_t *); 420extern int xfs_log_sbcount(xfs_mount_t *);
385extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); 421extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 2500f28689d5..0432a459871c 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -51,28 +51,16 @@ xfs_rui_item_free(
51 kmem_zone_free(xfs_rui_zone, ruip); 51 kmem_zone_free(xfs_rui_zone, ruip);
52} 52}
53 53
54/*
55 * This returns the number of iovecs needed to log the given rui item.
56 * We only need 1 iovec for an rui item. It just logs the rui_log_format
57 * structure.
58 */
59static inline int
60xfs_rui_item_sizeof(
61 struct xfs_rui_log_item *ruip)
62{
63 return sizeof(struct xfs_rui_log_format) +
64 (ruip->rui_format.rui_nextents - 1) *
65 sizeof(struct xfs_map_extent);
66}
67
68STATIC void 54STATIC void
69xfs_rui_item_size( 55xfs_rui_item_size(
70 struct xfs_log_item *lip, 56 struct xfs_log_item *lip,
71 int *nvecs, 57 int *nvecs,
72 int *nbytes) 58 int *nbytes)
73{ 59{
60 struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
61
74 *nvecs += 1; 62 *nvecs += 1;
75 *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip)); 63 *nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
76} 64}
77 65
78/* 66/*
@@ -97,7 +85,7 @@ xfs_rui_item_format(
97 ruip->rui_format.rui_size = 1; 85 ruip->rui_format.rui_size = 1;
98 86
99 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, 87 xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
100 xfs_rui_item_sizeof(ruip)); 88 xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
101} 89}
102 90
103/* 91/*
@@ -205,16 +193,12 @@ xfs_rui_init(
205 193
206{ 194{
207 struct xfs_rui_log_item *ruip; 195 struct xfs_rui_log_item *ruip;
208 uint size;
209 196
210 ASSERT(nextents > 0); 197 ASSERT(nextents > 0);
211 if (nextents > XFS_RUI_MAX_FAST_EXTENTS) { 198 if (nextents > XFS_RUI_MAX_FAST_EXTENTS)
212 size = (uint)(sizeof(struct xfs_rui_log_item) + 199 ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP);
213 ((nextents - 1) * sizeof(struct xfs_map_extent))); 200 else
214 ruip = kmem_zalloc(size, KM_SLEEP);
215 } else {
216 ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP); 201 ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
217 }
218 202
219 xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); 203 xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
220 ruip->rui_format.rui_nextents = nextents; 204 ruip->rui_format.rui_nextents = nextents;
@@ -239,14 +223,12 @@ xfs_rui_copy_format(
239 uint len; 223 uint len;
240 224
241 src_rui_fmt = buf->i_addr; 225 src_rui_fmt = buf->i_addr;
242 len = sizeof(struct xfs_rui_log_format) + 226 len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
243 (src_rui_fmt->rui_nextents - 1) *
244 sizeof(struct xfs_map_extent);
245 227
246 if (buf->i_len != len) 228 if (buf->i_len != len)
247 return -EFSCORRUPTED; 229 return -EFSCORRUPTED;
248 230
249 memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len); 231 memcpy(dst_rui_fmt, src_rui_fmt, len);
250 return 0; 232 return 0;
251} 233}
252 234
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index aefcc3a318a5..340c968e1f9c 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -70,6 +70,14 @@ struct xfs_rui_log_item {
70 struct xfs_rui_log_format rui_format; 70 struct xfs_rui_log_format rui_format;
71}; 71};
72 72
73static inline size_t
74xfs_rui_log_item_sizeof(
75 unsigned int nr)
76{
77 return offsetof(struct xfs_rui_log_item, rui_format) +
78 xfs_rui_log_format_sizeof(nr);
79}
80
73/* 81/*
74 * This is the "rmap update done" log item. It is used to log the fact that 82 * This is the "rmap update done" log item. It is used to log the fact that
75 * some rmapbt updates mentioned in an earlier rui item have been performed. 83 * some rmapbt updates mentioned in an earlier rui item have been performed.
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index fd6be45b3a1e..340975392e91 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1782,9 +1782,8 @@ xfs_init_zones(void)
1782 if (!xfs_rud_zone) 1782 if (!xfs_rud_zone)
1783 goto out_destroy_icreate_zone; 1783 goto out_destroy_icreate_zone;
1784 1784
1785 xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) + 1785 xfs_rui_zone = kmem_zone_init(
1786 ((XFS_RUI_MAX_FAST_EXTENTS - 1) * 1786 xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
1787 sizeof(struct xfs_map_extent))),
1788 "xfs_rui_item"); 1787 "xfs_rui_item");
1789 if (!xfs_rui_zone) 1788 if (!xfs_rui_zone)
1790 goto out_destroy_rud_zone; 1789 goto out_destroy_rud_zone;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index d303a665dba9..c2a875fcf26e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1570,14 +1570,15 @@ TRACE_EVENT(xfs_agf,
1570 1570
1571TRACE_EVENT(xfs_free_extent, 1571TRACE_EVENT(xfs_free_extent,
1572 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, 1572 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
1573 xfs_extlen_t len, bool isfl, int haveleft, int haveright), 1573 xfs_extlen_t len, enum xfs_ag_resv_type resv, int haveleft,
1574 TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), 1574 int haveright),
1575 TP_ARGS(mp, agno, agbno, len, resv, haveleft, haveright),
1575 TP_STRUCT__entry( 1576 TP_STRUCT__entry(
1576 __field(dev_t, dev) 1577 __field(dev_t, dev)
1577 __field(xfs_agnumber_t, agno) 1578 __field(xfs_agnumber_t, agno)
1578 __field(xfs_agblock_t, agbno) 1579 __field(xfs_agblock_t, agbno)
1579 __field(xfs_extlen_t, len) 1580 __field(xfs_extlen_t, len)
1580 __field(int, isfl) 1581 __field(int, resv)
1581 __field(int, haveleft) 1582 __field(int, haveleft)
1582 __field(int, haveright) 1583 __field(int, haveright)
1583 ), 1584 ),
@@ -1586,16 +1587,16 @@ TRACE_EVENT(xfs_free_extent,
1586 __entry->agno = agno; 1587 __entry->agno = agno;
1587 __entry->agbno = agbno; 1588 __entry->agbno = agbno;
1588 __entry->len = len; 1589 __entry->len = len;
1589 __entry->isfl = isfl; 1590 __entry->resv = resv;
1590 __entry->haveleft = haveleft; 1591 __entry->haveleft = haveleft;
1591 __entry->haveright = haveright; 1592 __entry->haveright = haveright;
1592 ), 1593 ),
1593 TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", 1594 TP_printk("dev %d:%d agno %u agbno %u len %u resv %d %s",
1594 MAJOR(__entry->dev), MINOR(__entry->dev), 1595 MAJOR(__entry->dev), MINOR(__entry->dev),
1595 __entry->agno, 1596 __entry->agno,
1596 __entry->agbno, 1597 __entry->agbno,
1597 __entry->len, 1598 __entry->len,
1598 __entry->isfl, 1599 __entry->resv,
1599 __entry->haveleft ? 1600 __entry->haveleft ?
1600 (__entry->haveright ? "both" : "left") : 1601 (__entry->haveright ? "both" : "left") :
1601 (__entry->haveright ? "right" : "none")) 1602 (__entry->haveright ? "right" : "none"))
@@ -1622,7 +1623,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1622 __field(short, otype) 1623 __field(short, otype)
1623 __field(char, wasdel) 1624 __field(char, wasdel)
1624 __field(char, wasfromfl) 1625 __field(char, wasfromfl)
1625 __field(char, isfl) 1626 __field(int, resv)
1626 __field(char, userdata) 1627 __field(char, userdata)
1627 __field(xfs_fsblock_t, firstblock) 1628 __field(xfs_fsblock_t, firstblock)
1628 ), 1629 ),
@@ -1643,13 +1644,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1643 __entry->otype = args->otype; 1644 __entry->otype = args->otype;
1644 __entry->wasdel = args->wasdel; 1645 __entry->wasdel = args->wasdel;
1645 __entry->wasfromfl = args->wasfromfl; 1646 __entry->wasfromfl = args->wasfromfl;
1646 __entry->isfl = args->isfl; 1647 __entry->resv = args->resv;
1647 __entry->userdata = args->userdata; 1648 __entry->userdata = args->userdata;
1648 __entry->firstblock = args->firstblock; 1649 __entry->firstblock = args->firstblock;
1649 ), 1650 ),
1650 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " 1651 TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
1651 "prod %u minleft %u total %u alignment %u minalignslop %u " 1652 "prod %u minleft %u total %u alignment %u minalignslop %u "
1652 "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " 1653 "len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
1653 "userdata %d firstblock 0x%llx", 1654 "userdata %d firstblock 0x%llx",
1654 MAJOR(__entry->dev), MINOR(__entry->dev), 1655 MAJOR(__entry->dev), MINOR(__entry->dev),
1655 __entry->agno, 1656 __entry->agno,
@@ -1667,7 +1668,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
1667 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), 1668 __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
1668 __entry->wasdel, 1669 __entry->wasdel,
1669 __entry->wasfromfl, 1670 __entry->wasfromfl,
1670 __entry->isfl, 1671 __entry->resv,
1671 __entry->userdata, 1672 __entry->userdata,
1672 (unsigned long long)__entry->firstblock) 1673 (unsigned long long)__entry->firstblock)
1673) 1674)
@@ -2558,6 +2559,60 @@ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
2558DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); 2559DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
2559DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); 2560DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
2560 2561
2562/* per-AG reservation */
2563DECLARE_EVENT_CLASS(xfs_ag_resv_class,
2564 TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv,
2565 xfs_extlen_t len),
2566 TP_ARGS(pag, resv, len),
2567 TP_STRUCT__entry(
2568 __field(dev_t, dev)
2569 __field(xfs_agnumber_t, agno)
2570 __field(int, resv)
2571 __field(xfs_extlen_t, freeblks)
2572 __field(xfs_extlen_t, flcount)
2573 __field(xfs_extlen_t, reserved)
2574 __field(xfs_extlen_t, asked)
2575 __field(xfs_extlen_t, len)
2576 ),
2577 TP_fast_assign(
2578 struct xfs_ag_resv *r = xfs_perag_resv(pag, resv);
2579
2580 __entry->dev = pag->pag_mount->m_super->s_dev;
2581 __entry->agno = pag->pag_agno;
2582 __entry->resv = resv;
2583 __entry->freeblks = pag->pagf_freeblks;
2584 __entry->flcount = pag->pagf_flcount;
2585 __entry->reserved = r ? r->ar_reserved : 0;
2586 __entry->asked = r ? r->ar_asked : 0;
2587 __entry->len = len;
2588 ),
2589 TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n",
2590 MAJOR(__entry->dev), MINOR(__entry->dev),
2591 __entry->agno,
2592 __entry->resv,
2593 __entry->freeblks,
2594 __entry->flcount,
2595 __entry->reserved,
2596 __entry->asked,
2597 __entry->len)
2598)
2599#define DEFINE_AG_RESV_EVENT(name) \
2600DEFINE_EVENT(xfs_ag_resv_class, name, \
2601 TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type type, \
2602 xfs_extlen_t len), \
2603 TP_ARGS(pag, type, len))
2604
2605/* per-AG reservation tracepoints */
2606DEFINE_AG_RESV_EVENT(xfs_ag_resv_init);
2607DEFINE_AG_RESV_EVENT(xfs_ag_resv_free);
2608DEFINE_AG_RESV_EVENT(xfs_ag_resv_alloc_extent);
2609DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent);
2610DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical);
2611DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
2612
2613DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error);
2614DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
2615
2561#endif /* _TRACE_XFS_H */ 2616#endif /* _TRACE_XFS_H */
2562 2617
2563#undef TRACE_INCLUDE_PATH 2618#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 836eb807aa88..70f42ea86dfb 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -318,7 +318,6 @@ xfs_trans_mod_sb(
318 * in-core superblock's counter. This should only 318 * in-core superblock's counter. This should only
319 * be applied to the on-disk superblock. 319 * be applied to the on-disk superblock.
320 */ 320 */
321 ASSERT(delta < 0);
322 tp->t_res_fdblocks_delta += delta; 321 tp->t_res_fdblocks_delta += delta;
323 if (xfs_sb_version_haslazysbcount(&mp->m_sb)) 322 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
324 flags &= ~XFS_TRANS_SB_DIRTY; 323 flags &= ~XFS_TRANS_SB_DIRTY;
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
index 459ddec137a4..ab438647592a 100644
--- a/fs/xfs/xfs_trans_extfree.c
+++ b/fs/xfs/xfs_trans_extfree.c
@@ -79,7 +79,8 @@ xfs_trans_free_extent(
79 79
80 trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); 80 trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
81 81
82 error = xfs_free_extent(tp, start_block, ext_len, oinfo); 82 error = xfs_free_extent(tp, start_block, ext_len, oinfo,
83 XFS_AG_RESV_NONE);
83 84
84 /* 85 /*
85 * Mark the transaction dirty, even on error. This ensures the 86 * Mark the transaction dirty, even on error. This ensures the