diff options
author | Dave Chinner <david@fromorbit.com> | 2016-10-02 18:52:31 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-10-02 18:52:31 -0400 |
commit | 79ad57612495744d3875a6fba25c467a87b3ad64 (patch) | |
tree | 34532ea5817090e1b650da0c67135d169e7c5e16 /fs/xfs | |
parent | b036b97050a9799aaaee78a8501aa255a74a2db7 (diff) | |
parent | 3fd129b63fd062a0d8f5d55994a6e98896c20fa7 (diff) |
Merge branch 'xfs-4.9-reflink-prep' into for-next
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag_resv.c | 325 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ag_resv.h | 35 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 112 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.h | 8 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 6 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.c | 59 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_btree.h | 28 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.c | 79 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_ialloc_btree.c | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_log_format.h | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_filestream.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_rmap_item.c | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_rmap_item.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 75 | ||||
-rw-r--r-- | fs/xfs/xfs_trans.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_extfree.c | 3 |
20 files changed, 719 insertions, 116 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index fc593c869493..584e87e11cb6 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile | |||
@@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \ | |||
52 | xfs_inode_fork.o \ | 52 | xfs_inode_fork.o \ |
53 | xfs_inode_buf.o \ | 53 | xfs_inode_buf.o \ |
54 | xfs_log_rlimit.o \ | 54 | xfs_log_rlimit.o \ |
55 | xfs_ag_resv.o \ | ||
55 | xfs_rmap.o \ | 56 | xfs_rmap.o \ |
56 | xfs_rmap_btree.o \ | 57 | xfs_rmap_btree.o \ |
57 | xfs_sb.o \ | 58 | xfs_sb.o \ |
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c new file mode 100644 index 000000000000..e3ae0f2b4294 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ag_resv.c | |||
@@ -0,0 +1,325 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Oracle. All Rights Reserved. | ||
3 | * | ||
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version 2 | ||
9 | * of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it would be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write the Free Software Foundation, | ||
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | #include "xfs.h" | ||
21 | #include "xfs_fs.h" | ||
22 | #include "xfs_shared.h" | ||
23 | #include "xfs_format.h" | ||
24 | #include "xfs_log_format.h" | ||
25 | #include "xfs_trans_resv.h" | ||
26 | #include "xfs_sb.h" | ||
27 | #include "xfs_mount.h" | ||
28 | #include "xfs_defer.h" | ||
29 | #include "xfs_alloc.h" | ||
30 | #include "xfs_error.h" | ||
31 | #include "xfs_trace.h" | ||
32 | #include "xfs_cksum.h" | ||
33 | #include "xfs_trans.h" | ||
34 | #include "xfs_bit.h" | ||
35 | #include "xfs_bmap.h" | ||
36 | #include "xfs_bmap_btree.h" | ||
37 | #include "xfs_ag_resv.h" | ||
38 | #include "xfs_trans_space.h" | ||
39 | #include "xfs_rmap_btree.h" | ||
40 | #include "xfs_btree.h" | ||
41 | |||
42 | /* | ||
43 | * Per-AG Block Reservations | ||
44 | * | ||
45 | * For some kinds of allocation group metadata structures, it is advantageous | ||
46 | * to reserve a small number of blocks in each AG so that future expansions of | ||
47 | * that data structure do not encounter ENOSPC because errors during a btree | ||
48 | * split cause the filesystem to go offline. | ||
49 | * | ||
50 | * Prior to the introduction of reflink, this wasn't an issue because the free | ||
51 | * space btrees maintain a reserve of space (the AGFL) to handle any expansion | ||
52 | * that may be necessary; and allocations of other metadata (inodes, BMBT, | ||
53 | * dir/attr) aren't restricted to a single AG. However, with reflink it is | ||
54 | * possible to allocate all the space in an AG, have subsequent reflink/CoW | ||
55 | * activity expand the refcount btree, and discover that there's no space left | ||
56 | * to handle that expansion. Since we can calculate the maximum size of the | ||
57 | * refcount btree, we can reserve space for it and avoid ENOSPC. | ||
58 | * | ||
59 | * Handling per-AG reservations consists of three changes to the allocator's | ||
60 | * behavior: First, because these reservations are always needed, we decrease | ||
61 | * the ag_max_usable counter to reflect the size of the AG after the reserved | ||
62 | * blocks are taken. Second, the reservations must be reflected in the | ||
63 | * fdblocks count to maintain proper accounting. Third, each AG must maintain | ||
64 | * its own reserved block counter so that we can calculate the amount of space | ||
65 | * that must remain free to maintain the reservations. Fourth, the "remaining | ||
66 | * reserved blocks" count must be used when calculating the length of the | ||
67 | * longest free extent in an AG and to clamp maxlen in the per-AG allocation | ||
68 | * functions. In other words, we maintain a virtual allocation via in-core | ||
69 | * accounting tricks so that we don't have to clean up after a crash. :) | ||
70 | * | ||
71 | * Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type | ||
72 | * values via struct xfs_alloc_arg or directly to the xfs_free_extent | ||
73 | * function. It might seem a little funny to maintain a reservoir of blocks | ||
74 | * to feed another reservoir, but the AGFL only holds enough blocks to get | ||
75 | * through the next transaction. The per-AG reservation is to ensure (we | ||
76 | * hope) that each AG never runs out of blocks. Each data structure wanting | ||
77 | * to use the reservation system should update ask/used in xfs_ag_resv_init. | ||
78 | */ | ||
79 | |||
80 | /* | ||
81 | * Are we critically low on blocks? For now we'll define that as the number | ||
82 | * of blocks we can get our hands on being less than 10% of what we reserved | ||
83 | * or less than some arbitrary number (maximum btree height). | ||
84 | */ | ||
85 | bool | ||
86 | xfs_ag_resv_critical( | ||
87 | struct xfs_perag *pag, | ||
88 | enum xfs_ag_resv_type type) | ||
89 | { | ||
90 | xfs_extlen_t avail; | ||
91 | xfs_extlen_t orig; | ||
92 | |||
93 | switch (type) { | ||
94 | case XFS_AG_RESV_METADATA: | ||
95 | avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved; | ||
96 | orig = pag->pag_meta_resv.ar_asked; | ||
97 | break; | ||
98 | case XFS_AG_RESV_AGFL: | ||
99 | avail = pag->pagf_freeblks + pag->pagf_flcount - | ||
100 | pag->pag_meta_resv.ar_reserved; | ||
101 | orig = pag->pag_agfl_resv.ar_asked; | ||
102 | break; | ||
103 | default: | ||
104 | ASSERT(0); | ||
105 | return false; | ||
106 | } | ||
107 | |||
108 | trace_xfs_ag_resv_critical(pag, type, avail); | ||
109 | |||
110 | /* Critically low if less than 10% or max btree height remains. */ | ||
111 | return avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * How many blocks are reserved but not used, and therefore must not be | ||
116 | * allocated away? | ||
117 | */ | ||
118 | xfs_extlen_t | ||
119 | xfs_ag_resv_needed( | ||
120 | struct xfs_perag *pag, | ||
121 | enum xfs_ag_resv_type type) | ||
122 | { | ||
123 | xfs_extlen_t len; | ||
124 | |||
125 | len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved; | ||
126 | switch (type) { | ||
127 | case XFS_AG_RESV_METADATA: | ||
128 | case XFS_AG_RESV_AGFL: | ||
129 | len -= xfs_perag_resv(pag, type)->ar_reserved; | ||
130 | break; | ||
131 | case XFS_AG_RESV_NONE: | ||
132 | /* empty */ | ||
133 | break; | ||
134 | default: | ||
135 | ASSERT(0); | ||
136 | } | ||
137 | |||
138 | trace_xfs_ag_resv_needed(pag, type, len); | ||
139 | |||
140 | return len; | ||
141 | } | ||
142 | |||
143 | /* Clean out a reservation */ | ||
144 | static int | ||
145 | __xfs_ag_resv_free( | ||
146 | struct xfs_perag *pag, | ||
147 | enum xfs_ag_resv_type type) | ||
148 | { | ||
149 | struct xfs_ag_resv *resv; | ||
150 | xfs_extlen_t oldresv; | ||
151 | int error; | ||
152 | |||
153 | trace_xfs_ag_resv_free(pag, type, 0); | ||
154 | |||
155 | resv = xfs_perag_resv(pag, type); | ||
156 | pag->pag_mount->m_ag_max_usable += resv->ar_asked; | ||
157 | /* | ||
158 | * AGFL blocks are always considered "free", so whatever | ||
159 | * was reserved at mount time must be given back at umount. | ||
160 | */ | ||
161 | if (type == XFS_AG_RESV_AGFL) | ||
162 | oldresv = resv->ar_orig_reserved; | ||
163 | else | ||
164 | oldresv = resv->ar_reserved; | ||
165 | error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); | ||
166 | resv->ar_reserved = 0; | ||
167 | resv->ar_asked = 0; | ||
168 | |||
169 | if (error) | ||
170 | trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, | ||
171 | error, _RET_IP_); | ||
172 | return error; | ||
173 | } | ||
174 | |||
175 | /* Free a per-AG reservation. */ | ||
176 | int | ||
177 | xfs_ag_resv_free( | ||
178 | struct xfs_perag *pag) | ||
179 | { | ||
180 | int error; | ||
181 | int err2; | ||
182 | |||
183 | error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); | ||
184 | err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); | ||
185 | if (err2 && !error) | ||
186 | error = err2; | ||
187 | return error; | ||
188 | } | ||
189 | |||
190 | static int | ||
191 | __xfs_ag_resv_init( | ||
192 | struct xfs_perag *pag, | ||
193 | enum xfs_ag_resv_type type, | ||
194 | xfs_extlen_t ask, | ||
195 | xfs_extlen_t used) | ||
196 | { | ||
197 | struct xfs_mount *mp = pag->pag_mount; | ||
198 | struct xfs_ag_resv *resv; | ||
199 | int error; | ||
200 | |||
201 | resv = xfs_perag_resv(pag, type); | ||
202 | if (used > ask) | ||
203 | ask = used; | ||
204 | resv->ar_asked = ask; | ||
205 | resv->ar_reserved = resv->ar_orig_reserved = ask - used; | ||
206 | mp->m_ag_max_usable -= ask; | ||
207 | |||
208 | trace_xfs_ag_resv_init(pag, type, ask); | ||
209 | |||
210 | error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true); | ||
211 | if (error) | ||
212 | trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, | ||
213 | error, _RET_IP_); | ||
214 | |||
215 | return error; | ||
216 | } | ||
217 | |||
218 | /* Create a per-AG block reservation. */ | ||
219 | int | ||
220 | xfs_ag_resv_init( | ||
221 | struct xfs_perag *pag) | ||
222 | { | ||
223 | xfs_extlen_t ask; | ||
224 | xfs_extlen_t used; | ||
225 | int error = 0; | ||
226 | |||
227 | /* Create the metadata reservation. */ | ||
228 | if (pag->pag_meta_resv.ar_asked == 0) { | ||
229 | ask = used = 0; | ||
230 | |||
231 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA, | ||
232 | ask, used); | ||
233 | if (error) | ||
234 | goto out; | ||
235 | } | ||
236 | |||
237 | /* Create the AGFL metadata reservation */ | ||
238 | if (pag->pag_agfl_resv.ar_asked == 0) { | ||
239 | ask = used = 0; | ||
240 | |||
241 | error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); | ||
242 | if (error) | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | out: | ||
247 | return error; | ||
248 | } | ||
249 | |||
250 | /* Allocate a block from the reservation. */ | ||
251 | void | ||
252 | xfs_ag_resv_alloc_extent( | ||
253 | struct xfs_perag *pag, | ||
254 | enum xfs_ag_resv_type type, | ||
255 | struct xfs_alloc_arg *args) | ||
256 | { | ||
257 | struct xfs_ag_resv *resv; | ||
258 | xfs_extlen_t len; | ||
259 | uint field; | ||
260 | |||
261 | trace_xfs_ag_resv_alloc_extent(pag, type, args->len); | ||
262 | |||
263 | switch (type) { | ||
264 | case XFS_AG_RESV_METADATA: | ||
265 | case XFS_AG_RESV_AGFL: | ||
266 | resv = xfs_perag_resv(pag, type); | ||
267 | break; | ||
268 | default: | ||
269 | ASSERT(0); | ||
270 | /* fall through */ | ||
271 | case XFS_AG_RESV_NONE: | ||
272 | field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : | ||
273 | XFS_TRANS_SB_FDBLOCKS; | ||
274 | xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len); | ||
275 | return; | ||
276 | } | ||
277 | |||
278 | len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); | ||
279 | resv->ar_reserved -= len; | ||
280 | if (type == XFS_AG_RESV_AGFL) | ||
281 | return; | ||
282 | /* Allocations of reserved blocks only need on-disk sb updates... */ | ||
283 | xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); | ||
284 | /* ...but non-reserved blocks need in-core and on-disk updates. */ | ||
285 | if (args->len > len) | ||
286 | xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS, | ||
287 | -((int64_t)args->len - len)); | ||
288 | } | ||
289 | |||
290 | /* Free a block to the reservation. */ | ||
291 | void | ||
292 | xfs_ag_resv_free_extent( | ||
293 | struct xfs_perag *pag, | ||
294 | enum xfs_ag_resv_type type, | ||
295 | struct xfs_trans *tp, | ||
296 | xfs_extlen_t len) | ||
297 | { | ||
298 | xfs_extlen_t leftover; | ||
299 | struct xfs_ag_resv *resv; | ||
300 | |||
301 | trace_xfs_ag_resv_free_extent(pag, type, len); | ||
302 | |||
303 | switch (type) { | ||
304 | case XFS_AG_RESV_METADATA: | ||
305 | case XFS_AG_RESV_AGFL: | ||
306 | resv = xfs_perag_resv(pag, type); | ||
307 | break; | ||
308 | default: | ||
309 | ASSERT(0); | ||
310 | /* fall through */ | ||
311 | case XFS_AG_RESV_NONE: | ||
312 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); | ||
313 | return; | ||
314 | } | ||
315 | |||
316 | leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); | ||
317 | resv->ar_reserved += leftover; | ||
318 | if (type == XFS_AG_RESV_AGFL) | ||
319 | return; | ||
320 | /* Freeing into the reserved pool only requires on-disk update... */ | ||
321 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); | ||
322 | /* ...but freeing beyond that requires in-core and on-disk update. */ | ||
323 | if (len > leftover) | ||
324 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover); | ||
325 | } | ||
diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h new file mode 100644 index 000000000000..8d6c687deef3 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ag_resv.h | |||
@@ -0,0 +1,35 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Oracle. All Rights Reserved. | ||
3 | * | ||
4 | * Author: Darrick J. Wong <darrick.wong@oracle.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version 2 | ||
9 | * of the License, or (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it would be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write the Free Software Foundation, | ||
18 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. | ||
19 | */ | ||
20 | #ifndef __XFS_AG_RESV_H__ | ||
21 | #define __XFS_AG_RESV_H__ | ||
22 | |||
23 | int xfs_ag_resv_free(struct xfs_perag *pag); | ||
24 | int xfs_ag_resv_init(struct xfs_perag *pag); | ||
25 | |||
26 | bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type); | ||
27 | xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag, | ||
28 | enum xfs_ag_resv_type type); | ||
29 | |||
30 | void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, | ||
31 | struct xfs_alloc_arg *args); | ||
32 | void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type, | ||
33 | struct xfs_trans *tp, xfs_extlen_t len); | ||
34 | |||
35 | #endif /* __XFS_AG_RESV_H__ */ | ||
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 05b5243d89f6..2620a86a756a 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include "xfs_trans.h" | 37 | #include "xfs_trans.h" |
38 | #include "xfs_buf_item.h" | 38 | #include "xfs_buf_item.h" |
39 | #include "xfs_log.h" | 39 | #include "xfs_log.h" |
40 | #include "xfs_ag_resv.h" | ||
40 | 41 | ||
41 | struct workqueue_struct *xfs_alloc_wq; | 42 | struct workqueue_struct *xfs_alloc_wq; |
42 | 43 | ||
@@ -74,14 +75,8 @@ xfs_prealloc_blocks( | |||
74 | * extents need to be actually allocated. To get around this, we explicitly set | 75 | * extents need to be actually allocated. To get around this, we explicitly set |
75 | * aside a few blocks which will not be reserved in delayed allocation. | 76 | * aside a few blocks which will not be reserved in delayed allocation. |
76 | * | 77 | * |
77 | * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist | 78 | * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a |
78 | * and 4 more to handle a potential split of the file's bmap btree. | 79 | * potential split of the file's bmap btree. |
79 | * | ||
80 | * When rmap is enabled, we must also be able to handle two rmap btree inserts | ||
81 | * to record both the file data extent and a new bmbt block. The bmbt block | ||
82 | * might not be in the same AG as the file data extent. In the worst case | ||
83 | * the bmap btree splits multiple levels and all the new blocks come from | ||
84 | * different AGs, so set aside enough to handle rmap btree splits in all AGs. | ||
85 | */ | 80 | */ |
86 | unsigned int | 81 | unsigned int |
87 | xfs_alloc_set_aside( | 82 | xfs_alloc_set_aside( |
@@ -90,8 +85,6 @@ xfs_alloc_set_aside( | |||
90 | unsigned int blocks; | 85 | unsigned int blocks; |
91 | 86 | ||
92 | blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); | 87 | blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); |
93 | if (xfs_sb_version_hasrmapbt(&mp->m_sb)) | ||
94 | blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels; | ||
95 | return blocks; | 88 | return blocks; |
96 | } | 89 | } |
97 | 90 | ||
@@ -680,12 +673,29 @@ xfs_alloc_ag_vextent( | |||
680 | xfs_alloc_arg_t *args) /* argument structure for allocation */ | 673 | xfs_alloc_arg_t *args) /* argument structure for allocation */ |
681 | { | 674 | { |
682 | int error=0; | 675 | int error=0; |
676 | xfs_extlen_t reservation; | ||
677 | xfs_extlen_t oldmax; | ||
683 | 678 | ||
684 | ASSERT(args->minlen > 0); | 679 | ASSERT(args->minlen > 0); |
685 | ASSERT(args->maxlen > 0); | 680 | ASSERT(args->maxlen > 0); |
686 | ASSERT(args->minlen <= args->maxlen); | 681 | ASSERT(args->minlen <= args->maxlen); |
687 | ASSERT(args->mod < args->prod); | 682 | ASSERT(args->mod < args->prod); |
688 | ASSERT(args->alignment > 0); | 683 | ASSERT(args->alignment > 0); |
684 | |||
685 | /* | ||
686 | * Clamp maxlen to the amount of free space minus any reservations | ||
687 | * that have been made. | ||
688 | */ | ||
689 | oldmax = args->maxlen; | ||
690 | reservation = xfs_ag_resv_needed(args->pag, args->resv); | ||
691 | if (args->maxlen > args->pag->pagf_freeblks - reservation) | ||
692 | args->maxlen = args->pag->pagf_freeblks - reservation; | ||
693 | if (args->maxlen == 0) { | ||
694 | args->agbno = NULLAGBLOCK; | ||
695 | args->maxlen = oldmax; | ||
696 | return 0; | ||
697 | } | ||
698 | |||
689 | /* | 699 | /* |
690 | * Branch to correct routine based on the type. | 700 | * Branch to correct routine based on the type. |
691 | */ | 701 | */ |
@@ -705,12 +715,14 @@ xfs_alloc_ag_vextent( | |||
705 | /* NOTREACHED */ | 715 | /* NOTREACHED */ |
706 | } | 716 | } |
707 | 717 | ||
718 | args->maxlen = oldmax; | ||
719 | |||
708 | if (error || args->agbno == NULLAGBLOCK) | 720 | if (error || args->agbno == NULLAGBLOCK) |
709 | return error; | 721 | return error; |
710 | 722 | ||
711 | ASSERT(args->len >= args->minlen); | 723 | ASSERT(args->len >= args->minlen); |
712 | ASSERT(args->len <= args->maxlen); | 724 | ASSERT(args->len <= args->maxlen); |
713 | ASSERT(!args->wasfromfl || !args->isfl); | 725 | ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL); |
714 | ASSERT(args->agbno % args->alignment == 0); | 726 | ASSERT(args->agbno % args->alignment == 0); |
715 | 727 | ||
716 | /* if not file data, insert new block into the reverse map btree */ | 728 | /* if not file data, insert new block into the reverse map btree */ |
@@ -732,12 +744,7 @@ xfs_alloc_ag_vextent( | |||
732 | args->agbno, args->len)); | 744 | args->agbno, args->len)); |
733 | } | 745 | } |
734 | 746 | ||
735 | if (!args->isfl) { | 747 | xfs_ag_resv_alloc_extent(args->pag, args->resv, args); |
736 | xfs_trans_mod_sb(args->tp, args->wasdel ? | ||
737 | XFS_TRANS_SB_RES_FDBLOCKS : | ||
738 | XFS_TRANS_SB_FDBLOCKS, | ||
739 | -((long)(args->len))); | ||
740 | } | ||
741 | 748 | ||
742 | XFS_STATS_INC(args->mp, xs_allocx); | 749 | XFS_STATS_INC(args->mp, xs_allocx); |
743 | XFS_STATS_ADD(args->mp, xs_allocb, args->len); | 750 | XFS_STATS_ADD(args->mp, xs_allocb, args->len); |
@@ -1583,6 +1590,7 @@ xfs_alloc_ag_vextent_small( | |||
1583 | int *stat) /* status: 0-freelist, 1-normal/none */ | 1590 | int *stat) /* status: 0-freelist, 1-normal/none */ |
1584 | { | 1591 | { |
1585 | struct xfs_owner_info oinfo; | 1592 | struct xfs_owner_info oinfo; |
1593 | struct xfs_perag *pag; | ||
1586 | int error; | 1594 | int error; |
1587 | xfs_agblock_t fbno; | 1595 | xfs_agblock_t fbno; |
1588 | xfs_extlen_t flen; | 1596 | xfs_extlen_t flen; |
@@ -1600,7 +1608,8 @@ xfs_alloc_ag_vextent_small( | |||
1600 | * to respect minleft even when pulling from the | 1608 | * to respect minleft even when pulling from the |
1601 | * freelist. | 1609 | * freelist. |
1602 | */ | 1610 | */ |
1603 | else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && | 1611 | else if (args->minlen == 1 && args->alignment == 1 && |
1612 | args->resv != XFS_AG_RESV_AGFL && | ||
1604 | (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) | 1613 | (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) |
1605 | > args->minleft)) { | 1614 | > args->minleft)) { |
1606 | error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); | 1615 | error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); |
@@ -1629,13 +1638,18 @@ xfs_alloc_ag_vextent_small( | |||
1629 | /* | 1638 | /* |
1630 | * If we're feeding an AGFL block to something that | 1639 | * If we're feeding an AGFL block to something that |
1631 | * doesn't live in the free space, we need to clear | 1640 | * doesn't live in the free space, we need to clear |
1632 | * out the OWN_AG rmap. | 1641 | * out the OWN_AG rmap and add the block back to |
1642 | * the AGFL per-AG reservation. | ||
1633 | */ | 1643 | */ |
1634 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); | 1644 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); |
1635 | error = xfs_rmap_free(args->tp, args->agbp, args->agno, | 1645 | error = xfs_rmap_free(args->tp, args->agbp, args->agno, |
1636 | fbno, 1, &oinfo); | 1646 | fbno, 1, &oinfo); |
1637 | if (error) | 1647 | if (error) |
1638 | goto error0; | 1648 | goto error0; |
1649 | pag = xfs_perag_get(args->mp, args->agno); | ||
1650 | xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL, | ||
1651 | args->tp, 1); | ||
1652 | xfs_perag_put(pag); | ||
1639 | 1653 | ||
1640 | *stat = 0; | 1654 | *stat = 0; |
1641 | return 0; | 1655 | return 0; |
@@ -1683,7 +1697,7 @@ xfs_free_ag_extent( | |||
1683 | xfs_agblock_t bno, | 1697 | xfs_agblock_t bno, |
1684 | xfs_extlen_t len, | 1698 | xfs_extlen_t len, |
1685 | struct xfs_owner_info *oinfo, | 1699 | struct xfs_owner_info *oinfo, |
1686 | int isfl) | 1700 | enum xfs_ag_resv_type type) |
1687 | { | 1701 | { |
1688 | xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ | 1702 | xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ |
1689 | xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ | 1703 | xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ |
@@ -1911,21 +1925,22 @@ xfs_free_ag_extent( | |||
1911 | */ | 1925 | */ |
1912 | pag = xfs_perag_get(mp, agno); | 1926 | pag = xfs_perag_get(mp, agno); |
1913 | error = xfs_alloc_update_counters(tp, pag, agbp, len); | 1927 | error = xfs_alloc_update_counters(tp, pag, agbp, len); |
1928 | xfs_ag_resv_free_extent(pag, type, tp, len); | ||
1914 | xfs_perag_put(pag); | 1929 | xfs_perag_put(pag); |
1915 | if (error) | 1930 | if (error) |
1916 | goto error0; | 1931 | goto error0; |
1917 | 1932 | ||
1918 | if (!isfl) | ||
1919 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); | ||
1920 | XFS_STATS_INC(mp, xs_freex); | 1933 | XFS_STATS_INC(mp, xs_freex); |
1921 | XFS_STATS_ADD(mp, xs_freeb, len); | 1934 | XFS_STATS_ADD(mp, xs_freeb, len); |
1922 | 1935 | ||
1923 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | 1936 | trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, |
1937 | haveleft, haveright); | ||
1924 | 1938 | ||
1925 | return 0; | 1939 | return 0; |
1926 | 1940 | ||
1927 | error0: | 1941 | error0: |
1928 | trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); | 1942 | trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, |
1943 | -1, -1); | ||
1929 | if (bno_cur) | 1944 | if (bno_cur) |
1930 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | 1945 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); |
1931 | if (cnt_cur) | 1946 | if (cnt_cur) |
@@ -1950,21 +1965,43 @@ xfs_alloc_compute_maxlevels( | |||
1950 | } | 1965 | } |
1951 | 1966 | ||
1952 | /* | 1967 | /* |
1953 | * Find the length of the longest extent in an AG. | 1968 | * Find the length of the longest extent in an AG. The 'need' parameter |
1969 | * specifies how much space we're going to need for the AGFL and the | ||
1970 | * 'reserved' parameter tells us how many blocks in this AG are reserved for | ||
1971 | * other callers. | ||
1954 | */ | 1972 | */ |
1955 | xfs_extlen_t | 1973 | xfs_extlen_t |
1956 | xfs_alloc_longest_free_extent( | 1974 | xfs_alloc_longest_free_extent( |
1957 | struct xfs_mount *mp, | 1975 | struct xfs_mount *mp, |
1958 | struct xfs_perag *pag, | 1976 | struct xfs_perag *pag, |
1959 | xfs_extlen_t need) | 1977 | xfs_extlen_t need, |
1978 | xfs_extlen_t reserved) | ||
1960 | { | 1979 | { |
1961 | xfs_extlen_t delta = 0; | 1980 | xfs_extlen_t delta = 0; |
1962 | 1981 | ||
1982 | /* | ||
1983 | * If the AGFL needs a recharge, we'll have to subtract that from the | ||
1984 | * longest extent. | ||
1985 | */ | ||
1963 | if (need > pag->pagf_flcount) | 1986 | if (need > pag->pagf_flcount) |
1964 | delta = need - pag->pagf_flcount; | 1987 | delta = need - pag->pagf_flcount; |
1965 | 1988 | ||
1989 | /* | ||
1990 | * If we cannot maintain others' reservations with space from the | ||
1991 | * not-longest freesp extents, we'll have to subtract /that/ from | ||
1992 | * the longest extent too. | ||
1993 | */ | ||
1994 | if (pag->pagf_freeblks - pag->pagf_longest < reserved) | ||
1995 | delta += reserved - (pag->pagf_freeblks - pag->pagf_longest); | ||
1996 | |||
1997 | /* | ||
1998 | * If the longest extent is long enough to satisfy all the | ||
1999 | * reservations and AGFL rules in place, we can return this extent. | ||
2000 | */ | ||
1966 | if (pag->pagf_longest > delta) | 2001 | if (pag->pagf_longest > delta) |
1967 | return pag->pagf_longest - delta; | 2002 | return pag->pagf_longest - delta; |
2003 | |||
2004 | /* Otherwise, let the caller try for 1 block if there's space. */ | ||
1968 | return pag->pagf_flcount > 0 || pag->pagf_longest > 0; | 2005 | return pag->pagf_flcount > 0 || pag->pagf_longest > 0; |
1969 | } | 2006 | } |
1970 | 2007 | ||
@@ -2004,20 +2041,24 @@ xfs_alloc_space_available( | |||
2004 | { | 2041 | { |
2005 | struct xfs_perag *pag = args->pag; | 2042 | struct xfs_perag *pag = args->pag; |
2006 | xfs_extlen_t longest; | 2043 | xfs_extlen_t longest; |
2044 | xfs_extlen_t reservation; /* blocks that are still reserved */ | ||
2007 | int available; | 2045 | int available; |
2008 | 2046 | ||
2009 | if (flags & XFS_ALLOC_FLAG_FREEING) | 2047 | if (flags & XFS_ALLOC_FLAG_FREEING) |
2010 | return true; | 2048 | return true; |
2011 | 2049 | ||
2050 | reservation = xfs_ag_resv_needed(pag, args->resv); | ||
2051 | |||
2012 | /* do we have enough contiguous free space for the allocation? */ | 2052 | /* do we have enough contiguous free space for the allocation? */ |
2013 | longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free); | 2053 | longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, |
2054 | reservation); | ||
2014 | if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) | 2055 | if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) |
2015 | return false; | 2056 | return false; |
2016 | 2057 | ||
2017 | /* do have enough free space remaining for the allocation? */ | 2058 | /* do we have enough free space remaining for the allocation? */ |
2018 | available = (int)(pag->pagf_freeblks + pag->pagf_flcount - | 2059 | available = (int)(pag->pagf_freeblks + pag->pagf_flcount - |
2019 | min_free - args->total); | 2060 | reservation - min_free - args->total); |
2020 | if (available < (int)args->minleft) | 2061 | if (available < (int)args->minleft || available <= 0) |
2021 | return false; | 2062 | return false; |
2022 | 2063 | ||
2023 | return true; | 2064 | return true; |
@@ -2124,7 +2165,7 @@ xfs_alloc_fix_freelist( | |||
2124 | if (error) | 2165 | if (error) |
2125 | goto out_agbp_relse; | 2166 | goto out_agbp_relse; |
2126 | error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, | 2167 | error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, |
2127 | &targs.oinfo, 1); | 2168 | &targs.oinfo, XFS_AG_RESV_AGFL); |
2128 | if (error) | 2169 | if (error) |
2129 | goto out_agbp_relse; | 2170 | goto out_agbp_relse; |
2130 | bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); | 2171 | bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); |
@@ -2135,7 +2176,7 @@ xfs_alloc_fix_freelist( | |||
2135 | targs.mp = mp; | 2176 | targs.mp = mp; |
2136 | targs.agbp = agbp; | 2177 | targs.agbp = agbp; |
2137 | targs.agno = args->agno; | 2178 | targs.agno = args->agno; |
2138 | targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; | 2179 | targs.alignment = targs.minlen = targs.prod = 1; |
2139 | targs.type = XFS_ALLOCTYPE_THIS_AG; | 2180 | targs.type = XFS_ALLOCTYPE_THIS_AG; |
2140 | targs.pag = pag; | 2181 | targs.pag = pag; |
2141 | error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); | 2182 | error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); |
@@ -2146,6 +2187,7 @@ xfs_alloc_fix_freelist( | |||
2146 | while (pag->pagf_flcount < need) { | 2187 | while (pag->pagf_flcount < need) { |
2147 | targs.agbno = 0; | 2188 | targs.agbno = 0; |
2148 | targs.maxlen = need - pag->pagf_flcount; | 2189 | targs.maxlen = need - pag->pagf_flcount; |
2190 | targs.resv = XFS_AG_RESV_AGFL; | ||
2149 | 2191 | ||
2150 | /* Allocate as many blocks as possible at once. */ | 2192 | /* Allocate as many blocks as possible at once. */ |
2151 | error = xfs_alloc_ag_vextent(&targs); | 2193 | error = xfs_alloc_ag_vextent(&targs); |
@@ -2825,7 +2867,8 @@ xfs_free_extent( | |||
2825 | struct xfs_trans *tp, /* transaction pointer */ | 2867 | struct xfs_trans *tp, /* transaction pointer */ |
2826 | xfs_fsblock_t bno, /* starting block number of extent */ | 2868 | xfs_fsblock_t bno, /* starting block number of extent */ |
2827 | xfs_extlen_t len, /* length of extent */ | 2869 | xfs_extlen_t len, /* length of extent */ |
2828 | struct xfs_owner_info *oinfo) /* extent owner */ | 2870 | struct xfs_owner_info *oinfo, /* extent owner */ |
2871 | enum xfs_ag_resv_type type) /* block reservation type */ | ||
2829 | { | 2872 | { |
2830 | struct xfs_mount *mp = tp->t_mountp; | 2873 | struct xfs_mount *mp = tp->t_mountp; |
2831 | struct xfs_buf *agbp; | 2874 | struct xfs_buf *agbp; |
@@ -2834,6 +2877,7 @@ xfs_free_extent( | |||
2834 | int error; | 2877 | int error; |
2835 | 2878 | ||
2836 | ASSERT(len != 0); | 2879 | ASSERT(len != 0); |
2880 | ASSERT(type != XFS_AG_RESV_AGFL); | ||
2837 | 2881 | ||
2838 | if (XFS_TEST_ERROR(false, mp, | 2882 | if (XFS_TEST_ERROR(false, mp, |
2839 | XFS_ERRTAG_FREE_EXTENT, | 2883 | XFS_ERRTAG_FREE_EXTENT, |
@@ -2851,7 +2895,7 @@ xfs_free_extent( | |||
2851 | agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), | 2895 | agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), |
2852 | err); | 2896 | err); |
2853 | 2897 | ||
2854 | error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0); | 2898 | error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); |
2855 | if (error) | 2899 | if (error) |
2856 | goto err; | 2900 | goto err; |
2857 | 2901 | ||
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 6fe2d6b7cfe9..f7c520193239 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h | |||
@@ -87,10 +87,10 @@ typedef struct xfs_alloc_arg { | |||
87 | xfs_alloctype_t otype; /* original allocation type */ | 87 | xfs_alloctype_t otype; /* original allocation type */ |
88 | char wasdel; /* set if allocation was prev delayed */ | 88 | char wasdel; /* set if allocation was prev delayed */ |
89 | char wasfromfl; /* set if allocation is from freelist */ | 89 | char wasfromfl; /* set if allocation is from freelist */ |
90 | char isfl; /* set if is freelist blocks - !acctg */ | ||
91 | char userdata; /* mask defining userdata treatment */ | 90 | char userdata; /* mask defining userdata treatment */ |
92 | xfs_fsblock_t firstblock; /* io first block allocated */ | 91 | xfs_fsblock_t firstblock; /* io first block allocated */ |
93 | struct xfs_owner_info oinfo; /* owner of blocks being allocated */ | 92 | struct xfs_owner_info oinfo; /* owner of blocks being allocated */ |
93 | enum xfs_ag_resv_type resv; /* block reservation to use */ | ||
94 | } xfs_alloc_arg_t; | 94 | } xfs_alloc_arg_t; |
95 | 95 | ||
96 | /* | 96 | /* |
@@ -106,7 +106,8 @@ unsigned int xfs_alloc_set_aside(struct xfs_mount *mp); | |||
106 | unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); | 106 | unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); |
107 | 107 | ||
108 | xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, | 108 | xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, |
109 | struct xfs_perag *pag, xfs_extlen_t need); | 109 | struct xfs_perag *pag, xfs_extlen_t need, |
110 | xfs_extlen_t reserved); | ||
110 | unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, | 111 | unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, |
111 | struct xfs_perag *pag); | 112 | struct xfs_perag *pag); |
112 | 113 | ||
@@ -184,7 +185,8 @@ xfs_free_extent( | |||
184 | struct xfs_trans *tp, /* transaction pointer */ | 185 | struct xfs_trans *tp, /* transaction pointer */ |
185 | xfs_fsblock_t bno, /* starting block number of extent */ | 186 | xfs_fsblock_t bno, /* starting block number of extent */ |
186 | xfs_extlen_t len, /* length of extent */ | 187 | xfs_extlen_t len, /* length of extent */ |
187 | struct xfs_owner_info *oinfo);/* extent owner */ | 188 | struct xfs_owner_info *oinfo, /* extent owner */ |
189 | enum xfs_ag_resv_type type); /* block reservation type */ | ||
188 | 190 | ||
189 | int /* error */ | 191 | int /* error */ |
190 | xfs_alloc_lookup_ge( | 192 | xfs_alloc_lookup_ge( |
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b060bca93402..042d7bf9fb60 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include "xfs_attr_leaf.h" | 47 | #include "xfs_attr_leaf.h" |
48 | #include "xfs_filestream.h" | 48 | #include "xfs_filestream.h" |
49 | #include "xfs_rmap.h" | 49 | #include "xfs_rmap.h" |
50 | #include "xfs_ag_resv.h" | ||
50 | 51 | ||
51 | 52 | ||
52 | kmem_zone_t *xfs_bmap_free_item_zone; | 53 | kmem_zone_t *xfs_bmap_free_item_zone; |
@@ -3501,7 +3502,8 @@ xfs_bmap_longest_free_extent( | |||
3501 | } | 3502 | } |
3502 | 3503 | ||
3503 | longest = xfs_alloc_longest_free_extent(mp, pag, | 3504 | longest = xfs_alloc_longest_free_extent(mp, pag, |
3504 | xfs_alloc_min_freelist(mp, pag)); | 3505 | xfs_alloc_min_freelist(mp, pag), |
3506 | xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); | ||
3505 | if (*blen < longest) | 3507 | if (*blen < longest) |
3506 | *blen = longest; | 3508 | *blen = longest; |
3507 | 3509 | ||
@@ -3781,7 +3783,7 @@ xfs_bmap_btalloc( | |||
3781 | } | 3783 | } |
3782 | args.minleft = ap->minleft; | 3784 | args.minleft = ap->minleft; |
3783 | args.wasdel = ap->wasdel; | 3785 | args.wasdel = ap->wasdel; |
3784 | args.isfl = 0; | 3786 | args.resv = XFS_AG_RESV_NONE; |
3785 | args.userdata = ap->userdata; | 3787 | args.userdata = ap->userdata; |
3786 | if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) | 3788 | if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) |
3787 | args.ip = ap->ip; | 3789 | args.ip = ap->ip; |
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 08569792fe20..aa1752f918b8 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c | |||
@@ -2070,7 +2070,7 @@ __xfs_btree_updkeys( | |||
2070 | struct xfs_buf *bp0, | 2070 | struct xfs_buf *bp0, |
2071 | bool force_all) | 2071 | bool force_all) |
2072 | { | 2072 | { |
2073 | union xfs_btree_bigkey key; /* keys from current level */ | 2073 | union xfs_btree_key key; /* keys from current level */ |
2074 | union xfs_btree_key *lkey; /* keys from the next level up */ | 2074 | union xfs_btree_key *lkey; /* keys from the next level up */ |
2075 | union xfs_btree_key *hkey; | 2075 | union xfs_btree_key *hkey; |
2076 | union xfs_btree_key *nlkey; /* keys from the next level up */ | 2076 | union xfs_btree_key *nlkey; /* keys from the next level up */ |
@@ -2086,7 +2086,7 @@ __xfs_btree_updkeys( | |||
2086 | 2086 | ||
2087 | trace_xfs_btree_updkeys(cur, level, bp0); | 2087 | trace_xfs_btree_updkeys(cur, level, bp0); |
2088 | 2088 | ||
2089 | lkey = (union xfs_btree_key *)&key; | 2089 | lkey = &key; |
2090 | hkey = xfs_btree_high_key_from_key(cur, lkey); | 2090 | hkey = xfs_btree_high_key_from_key(cur, lkey); |
2091 | xfs_btree_get_keys(cur, block, lkey); | 2091 | xfs_btree_get_keys(cur, block, lkey); |
2092 | for (level++; level < cur->bc_nlevels; level++) { | 2092 | for (level++; level < cur->bc_nlevels; level++) { |
@@ -3226,7 +3226,7 @@ xfs_btree_insrec( | |||
3226 | struct xfs_buf *bp; /* buffer for block */ | 3226 | struct xfs_buf *bp; /* buffer for block */ |
3227 | union xfs_btree_ptr nptr; /* new block ptr */ | 3227 | union xfs_btree_ptr nptr; /* new block ptr */ |
3228 | struct xfs_btree_cur *ncur; /* new btree cursor */ | 3228 | struct xfs_btree_cur *ncur; /* new btree cursor */ |
3229 | union xfs_btree_bigkey nkey; /* new block key */ | 3229 | union xfs_btree_key nkey; /* new block key */ |
3230 | union xfs_btree_key *lkey; | 3230 | union xfs_btree_key *lkey; |
3231 | int optr; /* old key/record index */ | 3231 | int optr; /* old key/record index */ |
3232 | int ptr; /* key/record index */ | 3232 | int ptr; /* key/record index */ |
@@ -3241,7 +3241,7 @@ xfs_btree_insrec( | |||
3241 | XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); | 3241 | XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); |
3242 | 3242 | ||
3243 | ncur = NULL; | 3243 | ncur = NULL; |
3244 | lkey = (union xfs_btree_key *)&nkey; | 3244 | lkey = &nkey; |
3245 | 3245 | ||
3246 | /* | 3246 | /* |
3247 | * If we have an external root pointer, and we've made it to the | 3247 | * If we have an external root pointer, and we've made it to the |
@@ -3444,14 +3444,14 @@ xfs_btree_insert( | |||
3444 | union xfs_btree_ptr nptr; /* new block number (split result) */ | 3444 | union xfs_btree_ptr nptr; /* new block number (split result) */ |
3445 | struct xfs_btree_cur *ncur; /* new cursor (split result) */ | 3445 | struct xfs_btree_cur *ncur; /* new cursor (split result) */ |
3446 | struct xfs_btree_cur *pcur; /* previous level's cursor */ | 3446 | struct xfs_btree_cur *pcur; /* previous level's cursor */ |
3447 | union xfs_btree_bigkey bkey; /* key of block to insert */ | 3447 | union xfs_btree_key bkey; /* key of block to insert */ |
3448 | union xfs_btree_key *key; | 3448 | union xfs_btree_key *key; |
3449 | union xfs_btree_rec rec; /* record to insert */ | 3449 | union xfs_btree_rec rec; /* record to insert */ |
3450 | 3450 | ||
3451 | level = 0; | 3451 | level = 0; |
3452 | ncur = NULL; | 3452 | ncur = NULL; |
3453 | pcur = cur; | 3453 | pcur = cur; |
3454 | key = (union xfs_btree_key *)&bkey; | 3454 | key = &bkey; |
3455 | 3455 | ||
3456 | xfs_btree_set_ptr_null(cur, &nptr); | 3456 | xfs_btree_set_ptr_null(cur, &nptr); |
3457 | 3457 | ||
@@ -4797,3 +4797,50 @@ xfs_btree_query_range( | |||
4797 | return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, | 4797 | return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, |
4798 | fn, priv); | 4798 | fn, priv); |
4799 | } | 4799 | } |
4800 | |||
4801 | /* | ||
4802 | * Calculate the number of blocks needed to store a given number of records | ||
4803 | * in a short-format (per-AG metadata) btree. | ||
4804 | */ | ||
4805 | xfs_extlen_t | ||
4806 | xfs_btree_calc_size( | ||
4807 | struct xfs_mount *mp, | ||
4808 | uint *limits, | ||
4809 | unsigned long long len) | ||
4810 | { | ||
4811 | int level; | ||
4812 | int maxrecs; | ||
4813 | xfs_extlen_t rval; | ||
4814 | |||
4815 | maxrecs = limits[0]; | ||
4816 | for (level = 0, rval = 0; len > 1; level++) { | ||
4817 | len += maxrecs - 1; | ||
4818 | do_div(len, maxrecs); | ||
4819 | maxrecs = limits[1]; | ||
4820 | rval += len; | ||
4821 | } | ||
4822 | return rval; | ||
4823 | } | ||
4824 | |||
4825 | int | ||
4826 | xfs_btree_count_blocks_helper( | ||
4827 | struct xfs_btree_cur *cur, | ||
4828 | int level, | ||
4829 | void *data) | ||
4830 | { | ||
4831 | xfs_extlen_t *blocks = data; | ||
4832 | (*blocks)++; | ||
4833 | |||
4834 | return 0; | ||
4835 | } | ||
4836 | |||
4837 | /* Count the blocks in a btree and return the result in *blocks. */ | ||
4838 | int | ||
4839 | xfs_btree_count_blocks( | ||
4840 | struct xfs_btree_cur *cur, | ||
4841 | xfs_extlen_t *blocks) | ||
4842 | { | ||
4843 | *blocks = 0; | ||
4844 | return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, | ||
4845 | blocks); | ||
4846 | } | ||
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 04d0865e5e6d..3f8556a5c2ad 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h | |||
@@ -37,30 +37,18 @@ union xfs_btree_ptr { | |||
37 | __be64 l; /* long form ptr */ | 37 | __be64 l; /* long form ptr */ |
38 | }; | 38 | }; |
39 | 39 | ||
40 | union xfs_btree_key { | ||
41 | struct xfs_bmbt_key bmbt; | ||
42 | xfs_bmdr_key_t bmbr; /* bmbt root block */ | ||
43 | xfs_alloc_key_t alloc; | ||
44 | struct xfs_inobt_key inobt; | ||
45 | struct xfs_rmap_key rmap; | ||
46 | }; | ||
47 | |||
48 | /* | 40 | /* |
49 | * In-core key that holds both low and high keys for overlapped btrees. | 41 | * The in-core btree key. Overlapping btrees actually store two keys |
50 | * The two keys are packed next to each other on disk, so do the same | 42 | * per pointer, so we reserve enough memory to hold both. The __*bigkey |
51 | * in memory. Preserve the existing xfs_btree_key as a single key to | 43 | * items should never be accessed directly. |
52 | * avoid the mental model breakage that would happen if we passed a | ||
53 | * bigkey into a function that operates on a single key. | ||
54 | */ | 44 | */ |
55 | union xfs_btree_bigkey { | 45 | union xfs_btree_key { |
56 | struct xfs_bmbt_key bmbt; | 46 | struct xfs_bmbt_key bmbt; |
57 | xfs_bmdr_key_t bmbr; /* bmbt root block */ | 47 | xfs_bmdr_key_t bmbr; /* bmbt root block */ |
58 | xfs_alloc_key_t alloc; | 48 | xfs_alloc_key_t alloc; |
59 | struct xfs_inobt_key inobt; | 49 | struct xfs_inobt_key inobt; |
60 | struct { | 50 | struct xfs_rmap_key rmap; |
61 | struct xfs_rmap_key rmap; | 51 | struct xfs_rmap_key __rmap_bigkey[2]; |
62 | struct xfs_rmap_key rmap_hi; | ||
63 | }; | ||
64 | }; | 52 | }; |
65 | 53 | ||
66 | union xfs_btree_rec { | 54 | union xfs_btree_rec { |
@@ -513,6 +501,8 @@ bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); | |||
513 | bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); | 501 | bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); |
514 | uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, | 502 | uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, |
515 | unsigned long len); | 503 | unsigned long len); |
504 | xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits, | ||
505 | unsigned long long len); | ||
516 | 506 | ||
517 | /* return codes */ | 507 | /* return codes */ |
518 | #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ | 508 | #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ |
@@ -529,4 +519,6 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, | |||
529 | int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, | 519 | int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, |
530 | xfs_btree_visit_blocks_fn fn, void *data); | 520 | xfs_btree_visit_blocks_fn fn, void *data); |
531 | 521 | ||
522 | int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks); | ||
523 | |||
532 | #endif /* __XFS_BTREE_H__ */ | 524 | #endif /* __XFS_BTREE_H__ */ |
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index c221d0ecd52e..613c5cf19436 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c | |||
@@ -81,6 +81,10 @@ | |||
81 | * - For each work item attached to the log intent item, | 81 | * - For each work item attached to the log intent item, |
82 | * * Perform the described action. | 82 | * * Perform the described action. |
83 | * * Attach the work item to the log done item. | 83 | * * Attach the work item to the log done item. |
84 | * * If the result of doing the work was -EAGAIN, ->finish work | ||
85 | * wants a new transaction. See the "Requesting a Fresh | ||
86 | * Transaction while Finishing Deferred Work" section below for | ||
87 | * details. | ||
84 | * | 88 | * |
85 | * The key here is that we must log an intent item for all pending | 89 | * The key here is that we must log an intent item for all pending |
86 | * work items every time we roll the transaction, and that we must log | 90 | * work items every time we roll the transaction, and that we must log |
@@ -88,6 +92,34 @@ | |||
88 | * we can perform complex remapping operations, chaining intent items | 92 | * we can perform complex remapping operations, chaining intent items |
89 | * as needed. | 93 | * as needed. |
90 | * | 94 | * |
95 | * Requesting a Fresh Transaction while Finishing Deferred Work | ||
96 | * | ||
97 | * If ->finish_item decides that it needs a fresh transaction to | ||
98 | * finish the work, it must ask its caller (xfs_defer_finish) for a | ||
99 | * continuation. The most likely cause of this circumstance are the | ||
100 | * refcount adjust functions deciding that they've logged enough items | ||
101 | * to be at risk of exceeding the transaction reservation. | ||
102 | * | ||
103 | * To get a fresh transaction, we want to log the existing log done | ||
104 | * item to prevent the log intent item from replaying, immediately log | ||
105 | * a new log intent item with the unfinished work items, roll the | ||
106 | * transaction, and re-call ->finish_item wherever it left off. The | ||
107 | * log done item and the new log intent item must be in the same | ||
108 | * transaction or atomicity cannot be guaranteed; defer_finish ensures | ||
109 | * that this happens. | ||
110 | * | ||
111 | * This requires some coordination between ->finish_item and | ||
112 | * defer_finish. Upon deciding to request a new transaction, | ||
113 | * ->finish_item should update the current work item to reflect the | ||
114 | * unfinished work. Next, it should reset the log done item's list | ||
115 | * count to the number of items finished, and return -EAGAIN. | ||
116 | * defer_finish sees the -EAGAIN, logs the new log intent item | ||
117 | * with the remaining work items, and leaves the xfs_defer_pending | ||
118 | * item at the head of the dop_work queue. Then it rolls the | ||
119 | * transaction and picks up processing where it left off. It is | ||
120 | * required that ->finish_item must be careful to leave enough | ||
121 | * transaction reservation to fit the new log intent item. | ||
122 | * | ||
91 | * This is an example of remapping the extent (E, E+B) into file X at | 123 | * This is an example of remapping the extent (E, E+B) into file X at |
92 | * offset A and dealing with the extent (C, C+B) already being mapped | 124 | * offset A and dealing with the extent (C, C+B) already being mapped |
93 | * there: | 125 | * there: |
@@ -104,21 +136,26 @@ | |||
104 | * | Intent to add rmap (X, E, A, B) | | 136 | * | Intent to add rmap (X, E, A, B) | |
105 | * +-------------------------------------------------+ | 137 | * +-------------------------------------------------+ |
106 | * | Reduce refcount for extent (C, B) | t2 | 138 | * | Reduce refcount for extent (C, B) | t2 |
107 | * | Done reducing refcount for extent (C, B) | | 139 | * | Done reducing refcount for extent (C, 9) | |
140 | * | Intent to reduce refcount for extent (C+9, B-9) | | ||
141 | * | (ran out of space after 9 refcount updates) | | ||
142 | * +-------------------------------------------------+ | ||
143 | * | Reduce refcount for extent (C+9, B+9) | t3 | ||
144 | * | Done reducing refcount for extent (C+9, B-9) | | ||
108 | * | Increase refcount for extent (E, B) | | 145 | * | Increase refcount for extent (E, B) | |
109 | * | Done increasing refcount for extent (E, B) | | 146 | * | Done increasing refcount for extent (E, B) | |
110 | * | Intent to free extent (C, B) | | 147 | * | Intent to free extent (C, B) | |
111 | * | Intent to free extent (F, 1) (refcountbt block) | | 148 | * | Intent to free extent (F, 1) (refcountbt block) | |
112 | * | Intent to remove rmap (F, 1, REFC) | | 149 | * | Intent to remove rmap (F, 1, REFC) | |
113 | * +-------------------------------------------------+ | 150 | * +-------------------------------------------------+ |
114 | * | Remove rmap (X, C, A, B) | t3 | 151 | * | Remove rmap (X, C, A, B) | t4 |
115 | * | Done removing rmap (X, C, A, B) | | 152 | * | Done removing rmap (X, C, A, B) | |
116 | * | Add rmap (X, E, A, B) | | 153 | * | Add rmap (X, E, A, B) | |
117 | * | Done adding rmap (X, E, A, B) | | 154 | * | Done adding rmap (X, E, A, B) | |
118 | * | Remove rmap (F, 1, REFC) | | 155 | * | Remove rmap (F, 1, REFC) | |
119 | * | Done removing rmap (F, 1, REFC) | | 156 | * | Done removing rmap (F, 1, REFC) | |
120 | * +-------------------------------------------------+ | 157 | * +-------------------------------------------------+ |
121 | * | Free extent (C, B) | t4 | 158 | * | Free extent (C, B) | t5 |
122 | * | Done freeing extent (C, B) | | 159 | * | Done freeing extent (C, B) | |
123 | * | Free extent (D, 1) | | 160 | * | Free extent (D, 1) | |
124 | * | Done freeing extent (D, 1) | | 161 | * | Done freeing extent (D, 1) | |
@@ -141,6 +178,9 @@ | |||
141 | * - Intent to free extent (C, B) | 178 | * - Intent to free extent (C, B) |
142 | * - Intent to free extent (F, 1) (refcountbt block) | 179 | * - Intent to free extent (F, 1) (refcountbt block) |
143 | * - Intent to remove rmap (F, 1, REFC) | 180 | * - Intent to remove rmap (F, 1, REFC) |
181 | * | ||
182 | * Note that the continuation requested between t2 and t3 is likely to | ||
183 | * reoccur. | ||
144 | */ | 184 | */ |
145 | 185 | ||
146 | static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX]; | 186 | static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX]; |
@@ -323,7 +363,16 @@ xfs_defer_finish( | |||
323 | dfp->dfp_count--; | 363 | dfp->dfp_count--; |
324 | error = dfp->dfp_type->finish_item(*tp, dop, li, | 364 | error = dfp->dfp_type->finish_item(*tp, dop, li, |
325 | dfp->dfp_done, &state); | 365 | dfp->dfp_done, &state); |
326 | if (error) { | 366 | if (error == -EAGAIN) { |
367 | /* | ||
368 | * Caller wants a fresh transaction; | ||
369 | * put the work item back on the list | ||
370 | * and jump out. | ||
371 | */ | ||
372 | list_add(li, &dfp->dfp_work); | ||
373 | dfp->dfp_count++; | ||
374 | break; | ||
375 | } else if (error) { | ||
327 | /* | 376 | /* |
328 | * Clean up after ourselves and jump out. | 377 | * Clean up after ourselves and jump out. |
329 | * xfs_defer_cancel will take care of freeing | 378 | * xfs_defer_cancel will take care of freeing |
@@ -335,9 +384,25 @@ xfs_defer_finish( | |||
335 | goto out; | 384 | goto out; |
336 | } | 385 | } |
337 | } | 386 | } |
338 | /* Done with the dfp, free it. */ | 387 | if (error == -EAGAIN) { |
339 | list_del(&dfp->dfp_list); | 388 | /* |
340 | kmem_free(dfp); | 389 | * Caller wants a fresh transaction, so log a |
390 | * new log intent item to replace the old one | ||
391 | * and roll the transaction. See "Requesting | ||
392 | * a Fresh Transaction while Finishing | ||
393 | * Deferred Work" above. | ||
394 | */ | ||
395 | dfp->dfp_intent = dfp->dfp_type->create_intent(*tp, | ||
396 | dfp->dfp_count); | ||
397 | dfp->dfp_done = NULL; | ||
398 | list_for_each(li, &dfp->dfp_work) | ||
399 | dfp->dfp_type->log_item(*tp, dfp->dfp_intent, | ||
400 | li); | ||
401 | } else { | ||
402 | /* Done with the dfp, free it. */ | ||
403 | list_del(&dfp->dfp_list); | ||
404 | kmem_free(dfp); | ||
405 | } | ||
341 | 406 | ||
342 | if (cleanup_fn) | 407 | if (cleanup_fn) |
343 | cleanup_fn(*tp, state, error); | 408 | cleanup_fn(*tp, state, error); |
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 31ca2208c03d..eab68ae2e011 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c | |||
@@ -132,7 +132,7 @@ xfs_inobt_free_block( | |||
132 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); | 132 | xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); |
133 | return xfs_free_extent(cur->bc_tp, | 133 | return xfs_free_extent(cur->bc_tp, |
134 | XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, | 134 | XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, |
135 | &oinfo); | 135 | &oinfo, XFS_AG_RESV_NONE); |
136 | } | 136 | } |
137 | 137 | ||
138 | STATIC int | 138 | STATIC int |
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index a6eed43fa7cd..fc5eef85d61e 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h | |||
@@ -647,9 +647,17 @@ struct xfs_rui_log_format { | |||
647 | __uint16_t rui_size; /* size of this item */ | 647 | __uint16_t rui_size; /* size of this item */ |
648 | __uint32_t rui_nextents; /* # extents to free */ | 648 | __uint32_t rui_nextents; /* # extents to free */ |
649 | __uint64_t rui_id; /* rui identifier */ | 649 | __uint64_t rui_id; /* rui identifier */ |
650 | struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */ | 650 | struct xfs_map_extent rui_extents[]; /* array of extents to rmap */ |
651 | }; | 651 | }; |
652 | 652 | ||
653 | static inline size_t | ||
654 | xfs_rui_log_format_sizeof( | ||
655 | unsigned int nr) | ||
656 | { | ||
657 | return sizeof(struct xfs_rui_log_format) + | ||
658 | nr * sizeof(struct xfs_map_extent); | ||
659 | } | ||
660 | |||
653 | /* | 661 | /* |
654 | * This is the structure used to lay out an rud log item in the | 662 | * This is the structure used to lay out an rud log item in the |
655 | * log. The rud_extents array is a variable size array whose | 663 | * log. The rud_extents array is a variable size array whose |
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 4a33a3304369..c8005fdaaa8a 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include "xfs_mru_cache.h" | 30 | #include "xfs_mru_cache.h" |
31 | #include "xfs_filestream.h" | 31 | #include "xfs_filestream.h" |
32 | #include "xfs_trace.h" | 32 | #include "xfs_trace.h" |
33 | #include "xfs_ag_resv.h" | ||
33 | 34 | ||
34 | struct xfs_fstrm_item { | 35 | struct xfs_fstrm_item { |
35 | struct xfs_mru_cache_elem mru; | 36 | struct xfs_mru_cache_elem mru; |
@@ -198,7 +199,8 @@ xfs_filestream_pick_ag( | |||
198 | } | 199 | } |
199 | 200 | ||
200 | longest = xfs_alloc_longest_free_extent(mp, pag, | 201 | longest = xfs_alloc_longest_free_extent(mp, pag, |
201 | xfs_alloc_min_freelist(mp, pag)); | 202 | xfs_alloc_min_freelist(mp, pag), |
203 | xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); | ||
202 | if (((minlen && longest >= minlen) || | 204 | if (((minlen && longest >= minlen) || |
203 | (!minlen && pag->pagf_freeblks >= minfree)) && | 205 | (!minlen && pag->pagf_freeblks >= minfree)) && |
204 | (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || | 206 | (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0b7f986745c1..94ac06f3d908 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -553,7 +553,7 @@ xfs_growfs_data_private( | |||
553 | error = xfs_free_extent(tp, | 553 | error = xfs_free_extent(tp, |
554 | XFS_AGB_TO_FSB(mp, agno, | 554 | XFS_AGB_TO_FSB(mp, agno, |
555 | be32_to_cpu(agf->agf_length) - new), | 555 | be32_to_cpu(agf->agf_length) - new), |
556 | new, &oinfo); | 556 | new, &oinfo, XFS_AG_RESV_NONE); |
557 | if (error) | 557 | if (error) |
558 | goto error0; | 558 | goto error0; |
559 | } | 559 | } |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index efd4a5526f37..041d9493e798 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -331,6 +331,22 @@ xfs_mp_fail_writes(struct xfs_mount *mp) | |||
331 | } | 331 | } |
332 | #endif | 332 | #endif |
333 | 333 | ||
334 | /* per-AG block reservation data structures*/ | ||
335 | enum xfs_ag_resv_type { | ||
336 | XFS_AG_RESV_NONE = 0, | ||
337 | XFS_AG_RESV_METADATA, | ||
338 | XFS_AG_RESV_AGFL, | ||
339 | }; | ||
340 | |||
341 | struct xfs_ag_resv { | ||
342 | /* number of blocks originally reserved here */ | ||
343 | xfs_extlen_t ar_orig_reserved; | ||
344 | /* number of blocks reserved here */ | ||
345 | xfs_extlen_t ar_reserved; | ||
346 | /* number of blocks originally asked for */ | ||
347 | xfs_extlen_t ar_asked; | ||
348 | }; | ||
349 | |||
334 | /* | 350 | /* |
335 | * Per-ag incore structure, copies of information in agf and agi, to improve the | 351 | * Per-ag incore structure, copies of information in agf and agi, to improve the |
336 | * performance of allocation group selection. | 352 | * performance of allocation group selection. |
@@ -378,8 +394,28 @@ typedef struct xfs_perag { | |||
378 | /* for rcu-safe freeing */ | 394 | /* for rcu-safe freeing */ |
379 | struct rcu_head rcu_head; | 395 | struct rcu_head rcu_head; |
380 | int pagb_count; /* pagb slots in use */ | 396 | int pagb_count; /* pagb slots in use */ |
397 | |||
398 | /* Blocks reserved for all kinds of metadata. */ | ||
399 | struct xfs_ag_resv pag_meta_resv; | ||
400 | /* Blocks reserved for just AGFL-based metadata. */ | ||
401 | struct xfs_ag_resv pag_agfl_resv; | ||
381 | } xfs_perag_t; | 402 | } xfs_perag_t; |
382 | 403 | ||
404 | static inline struct xfs_ag_resv * | ||
405 | xfs_perag_resv( | ||
406 | struct xfs_perag *pag, | ||
407 | enum xfs_ag_resv_type type) | ||
408 | { | ||
409 | switch (type) { | ||
410 | case XFS_AG_RESV_METADATA: | ||
411 | return &pag->pag_meta_resv; | ||
412 | case XFS_AG_RESV_AGFL: | ||
413 | return &pag->pag_agfl_resv; | ||
414 | default: | ||
415 | return NULL; | ||
416 | } | ||
417 | } | ||
418 | |||
383 | extern void xfs_uuid_table_free(void); | 419 | extern void xfs_uuid_table_free(void); |
384 | extern int xfs_log_sbcount(xfs_mount_t *); | 420 | extern int xfs_log_sbcount(xfs_mount_t *); |
385 | extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); | 421 | extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); |
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 2500f28689d5..0432a459871c 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c | |||
@@ -51,28 +51,16 @@ xfs_rui_item_free( | |||
51 | kmem_zone_free(xfs_rui_zone, ruip); | 51 | kmem_zone_free(xfs_rui_zone, ruip); |
52 | } | 52 | } |
53 | 53 | ||
54 | /* | ||
55 | * This returns the number of iovecs needed to log the given rui item. | ||
56 | * We only need 1 iovec for an rui item. It just logs the rui_log_format | ||
57 | * structure. | ||
58 | */ | ||
59 | static inline int | ||
60 | xfs_rui_item_sizeof( | ||
61 | struct xfs_rui_log_item *ruip) | ||
62 | { | ||
63 | return sizeof(struct xfs_rui_log_format) + | ||
64 | (ruip->rui_format.rui_nextents - 1) * | ||
65 | sizeof(struct xfs_map_extent); | ||
66 | } | ||
67 | |||
68 | STATIC void | 54 | STATIC void |
69 | xfs_rui_item_size( | 55 | xfs_rui_item_size( |
70 | struct xfs_log_item *lip, | 56 | struct xfs_log_item *lip, |
71 | int *nvecs, | 57 | int *nvecs, |
72 | int *nbytes) | 58 | int *nbytes) |
73 | { | 59 | { |
60 | struct xfs_rui_log_item *ruip = RUI_ITEM(lip); | ||
61 | |||
74 | *nvecs += 1; | 62 | *nvecs += 1; |
75 | *nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip)); | 63 | *nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents); |
76 | } | 64 | } |
77 | 65 | ||
78 | /* | 66 | /* |
@@ -97,7 +85,7 @@ xfs_rui_item_format( | |||
97 | ruip->rui_format.rui_size = 1; | 85 | ruip->rui_format.rui_size = 1; |
98 | 86 | ||
99 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, | 87 | xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, |
100 | xfs_rui_item_sizeof(ruip)); | 88 | xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents)); |
101 | } | 89 | } |
102 | 90 | ||
103 | /* | 91 | /* |
@@ -205,16 +193,12 @@ xfs_rui_init( | |||
205 | 193 | ||
206 | { | 194 | { |
207 | struct xfs_rui_log_item *ruip; | 195 | struct xfs_rui_log_item *ruip; |
208 | uint size; | ||
209 | 196 | ||
210 | ASSERT(nextents > 0); | 197 | ASSERT(nextents > 0); |
211 | if (nextents > XFS_RUI_MAX_FAST_EXTENTS) { | 198 | if (nextents > XFS_RUI_MAX_FAST_EXTENTS) |
212 | size = (uint)(sizeof(struct xfs_rui_log_item) + | 199 | ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP); |
213 | ((nextents - 1) * sizeof(struct xfs_map_extent))); | 200 | else |
214 | ruip = kmem_zalloc(size, KM_SLEEP); | ||
215 | } else { | ||
216 | ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP); | 201 | ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP); |
217 | } | ||
218 | 202 | ||
219 | xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); | 203 | xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); |
220 | ruip->rui_format.rui_nextents = nextents; | 204 | ruip->rui_format.rui_nextents = nextents; |
@@ -239,14 +223,12 @@ xfs_rui_copy_format( | |||
239 | uint len; | 223 | uint len; |
240 | 224 | ||
241 | src_rui_fmt = buf->i_addr; | 225 | src_rui_fmt = buf->i_addr; |
242 | len = sizeof(struct xfs_rui_log_format) + | 226 | len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents); |
243 | (src_rui_fmt->rui_nextents - 1) * | ||
244 | sizeof(struct xfs_map_extent); | ||
245 | 227 | ||
246 | if (buf->i_len != len) | 228 | if (buf->i_len != len) |
247 | return -EFSCORRUPTED; | 229 | return -EFSCORRUPTED; |
248 | 230 | ||
249 | memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len); | 231 | memcpy(dst_rui_fmt, src_rui_fmt, len); |
250 | return 0; | 232 | return 0; |
251 | } | 233 | } |
252 | 234 | ||
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index aefcc3a318a5..340c968e1f9c 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h | |||
@@ -70,6 +70,14 @@ struct xfs_rui_log_item { | |||
70 | struct xfs_rui_log_format rui_format; | 70 | struct xfs_rui_log_format rui_format; |
71 | }; | 71 | }; |
72 | 72 | ||
73 | static inline size_t | ||
74 | xfs_rui_log_item_sizeof( | ||
75 | unsigned int nr) | ||
76 | { | ||
77 | return offsetof(struct xfs_rui_log_item, rui_format) + | ||
78 | xfs_rui_log_format_sizeof(nr); | ||
79 | } | ||
80 | |||
73 | /* | 81 | /* |
74 | * This is the "rmap update done" log item. It is used to log the fact that | 82 | * This is the "rmap update done" log item. It is used to log the fact that |
75 | * some rmapbt updates mentioned in an earlier rui item have been performed. | 83 | * some rmapbt updates mentioned in an earlier rui item have been performed. |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index fd6be45b3a1e..340975392e91 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -1782,9 +1782,8 @@ xfs_init_zones(void) | |||
1782 | if (!xfs_rud_zone) | 1782 | if (!xfs_rud_zone) |
1783 | goto out_destroy_icreate_zone; | 1783 | goto out_destroy_icreate_zone; |
1784 | 1784 | ||
1785 | xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) + | 1785 | xfs_rui_zone = kmem_zone_init( |
1786 | ((XFS_RUI_MAX_FAST_EXTENTS - 1) * | 1786 | xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), |
1787 | sizeof(struct xfs_map_extent))), | ||
1788 | "xfs_rui_item"); | 1787 | "xfs_rui_item"); |
1789 | if (!xfs_rui_zone) | 1788 | if (!xfs_rui_zone) |
1790 | goto out_destroy_rud_zone; | 1789 | goto out_destroy_rud_zone; |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index d303a665dba9..c2a875fcf26e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -1570,14 +1570,15 @@ TRACE_EVENT(xfs_agf, | |||
1570 | 1570 | ||
1571 | TRACE_EVENT(xfs_free_extent, | 1571 | TRACE_EVENT(xfs_free_extent, |
1572 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, | 1572 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, |
1573 | xfs_extlen_t len, bool isfl, int haveleft, int haveright), | 1573 | xfs_extlen_t len, enum xfs_ag_resv_type resv, int haveleft, |
1574 | TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), | 1574 | int haveright), |
1575 | TP_ARGS(mp, agno, agbno, len, resv, haveleft, haveright), | ||
1575 | TP_STRUCT__entry( | 1576 | TP_STRUCT__entry( |
1576 | __field(dev_t, dev) | 1577 | __field(dev_t, dev) |
1577 | __field(xfs_agnumber_t, agno) | 1578 | __field(xfs_agnumber_t, agno) |
1578 | __field(xfs_agblock_t, agbno) | 1579 | __field(xfs_agblock_t, agbno) |
1579 | __field(xfs_extlen_t, len) | 1580 | __field(xfs_extlen_t, len) |
1580 | __field(int, isfl) | 1581 | __field(int, resv) |
1581 | __field(int, haveleft) | 1582 | __field(int, haveleft) |
1582 | __field(int, haveright) | 1583 | __field(int, haveright) |
1583 | ), | 1584 | ), |
@@ -1586,16 +1587,16 @@ TRACE_EVENT(xfs_free_extent, | |||
1586 | __entry->agno = agno; | 1587 | __entry->agno = agno; |
1587 | __entry->agbno = agbno; | 1588 | __entry->agbno = agbno; |
1588 | __entry->len = len; | 1589 | __entry->len = len; |
1589 | __entry->isfl = isfl; | 1590 | __entry->resv = resv; |
1590 | __entry->haveleft = haveleft; | 1591 | __entry->haveleft = haveleft; |
1591 | __entry->haveright = haveright; | 1592 | __entry->haveright = haveright; |
1592 | ), | 1593 | ), |
1593 | TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", | 1594 | TP_printk("dev %d:%d agno %u agbno %u len %u resv %d %s", |
1594 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1595 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1595 | __entry->agno, | 1596 | __entry->agno, |
1596 | __entry->agbno, | 1597 | __entry->agbno, |
1597 | __entry->len, | 1598 | __entry->len, |
1598 | __entry->isfl, | 1599 | __entry->resv, |
1599 | __entry->haveleft ? | 1600 | __entry->haveleft ? |
1600 | (__entry->haveright ? "both" : "left") : | 1601 | (__entry->haveright ? "both" : "left") : |
1601 | (__entry->haveright ? "right" : "none")) | 1602 | (__entry->haveright ? "right" : "none")) |
@@ -1622,7 +1623,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
1622 | __field(short, otype) | 1623 | __field(short, otype) |
1623 | __field(char, wasdel) | 1624 | __field(char, wasdel) |
1624 | __field(char, wasfromfl) | 1625 | __field(char, wasfromfl) |
1625 | __field(char, isfl) | 1626 | __field(int, resv) |
1626 | __field(char, userdata) | 1627 | __field(char, userdata) |
1627 | __field(xfs_fsblock_t, firstblock) | 1628 | __field(xfs_fsblock_t, firstblock) |
1628 | ), | 1629 | ), |
@@ -1643,13 +1644,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
1643 | __entry->otype = args->otype; | 1644 | __entry->otype = args->otype; |
1644 | __entry->wasdel = args->wasdel; | 1645 | __entry->wasdel = args->wasdel; |
1645 | __entry->wasfromfl = args->wasfromfl; | 1646 | __entry->wasfromfl = args->wasfromfl; |
1646 | __entry->isfl = args->isfl; | 1647 | __entry->resv = args->resv; |
1647 | __entry->userdata = args->userdata; | 1648 | __entry->userdata = args->userdata; |
1648 | __entry->firstblock = args->firstblock; | 1649 | __entry->firstblock = args->firstblock; |
1649 | ), | 1650 | ), |
1650 | TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " | 1651 | TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " |
1651 | "prod %u minleft %u total %u alignment %u minalignslop %u " | 1652 | "prod %u minleft %u total %u alignment %u minalignslop %u " |
1652 | "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " | 1653 | "len %u type %s otype %s wasdel %d wasfromfl %d resv %d " |
1653 | "userdata %d firstblock 0x%llx", | 1654 | "userdata %d firstblock 0x%llx", |
1654 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1655 | MAJOR(__entry->dev), MINOR(__entry->dev), |
1655 | __entry->agno, | 1656 | __entry->agno, |
@@ -1667,7 +1668,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
1667 | __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), | 1668 | __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), |
1668 | __entry->wasdel, | 1669 | __entry->wasdel, |
1669 | __entry->wasfromfl, | 1670 | __entry->wasfromfl, |
1670 | __entry->isfl, | 1671 | __entry->resv, |
1671 | __entry->userdata, | 1672 | __entry->userdata, |
1672 | (unsigned long long)__entry->firstblock) | 1673 | (unsigned long long)__entry->firstblock) |
1673 | ) | 1674 | ) |
@@ -2558,6 +2559,60 @@ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result); | |||
2558 | DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); | 2559 | DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); |
2559 | DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); | 2560 | DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); |
2560 | 2561 | ||
2562 | /* per-AG reservation */ | ||
2563 | DECLARE_EVENT_CLASS(xfs_ag_resv_class, | ||
2564 | TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv, | ||
2565 | xfs_extlen_t len), | ||
2566 | TP_ARGS(pag, resv, len), | ||
2567 | TP_STRUCT__entry( | ||
2568 | __field(dev_t, dev) | ||
2569 | __field(xfs_agnumber_t, agno) | ||
2570 | __field(int, resv) | ||
2571 | __field(xfs_extlen_t, freeblks) | ||
2572 | __field(xfs_extlen_t, flcount) | ||
2573 | __field(xfs_extlen_t, reserved) | ||
2574 | __field(xfs_extlen_t, asked) | ||
2575 | __field(xfs_extlen_t, len) | ||
2576 | ), | ||
2577 | TP_fast_assign( | ||
2578 | struct xfs_ag_resv *r = xfs_perag_resv(pag, resv); | ||
2579 | |||
2580 | __entry->dev = pag->pag_mount->m_super->s_dev; | ||
2581 | __entry->agno = pag->pag_agno; | ||
2582 | __entry->resv = resv; | ||
2583 | __entry->freeblks = pag->pagf_freeblks; | ||
2584 | __entry->flcount = pag->pagf_flcount; | ||
2585 | __entry->reserved = r ? r->ar_reserved : 0; | ||
2586 | __entry->asked = r ? r->ar_asked : 0; | ||
2587 | __entry->len = len; | ||
2588 | ), | ||
2589 | TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n", | ||
2590 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
2591 | __entry->agno, | ||
2592 | __entry->resv, | ||
2593 | __entry->freeblks, | ||
2594 | __entry->flcount, | ||
2595 | __entry->reserved, | ||
2596 | __entry->asked, | ||
2597 | __entry->len) | ||
2598 | ) | ||
2599 | #define DEFINE_AG_RESV_EVENT(name) \ | ||
2600 | DEFINE_EVENT(xfs_ag_resv_class, name, \ | ||
2601 | TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type type, \ | ||
2602 | xfs_extlen_t len), \ | ||
2603 | TP_ARGS(pag, type, len)) | ||
2604 | |||
2605 | /* per-AG reservation tracepoints */ | ||
2606 | DEFINE_AG_RESV_EVENT(xfs_ag_resv_init); | ||
2607 | DEFINE_AG_RESV_EVENT(xfs_ag_resv_free); | ||
2608 | DEFINE_AG_RESV_EVENT(xfs_ag_resv_alloc_extent); | ||
2609 | DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent); | ||
2610 | DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical); | ||
2611 | DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed); | ||
2612 | |||
2613 | DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error); | ||
2614 | DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error); | ||
2615 | |||
2561 | #endif /* _TRACE_XFS_H */ | 2616 | #endif /* _TRACE_XFS_H */ |
2562 | 2617 | ||
2563 | #undef TRACE_INCLUDE_PATH | 2618 | #undef TRACE_INCLUDE_PATH |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 836eb807aa88..70f42ea86dfb 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -318,7 +318,6 @@ xfs_trans_mod_sb( | |||
318 | * in-core superblock's counter. This should only | 318 | * in-core superblock's counter. This should only |
319 | * be applied to the on-disk superblock. | 319 | * be applied to the on-disk superblock. |
320 | */ | 320 | */ |
321 | ASSERT(delta < 0); | ||
322 | tp->t_res_fdblocks_delta += delta; | 321 | tp->t_res_fdblocks_delta += delta; |
323 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) | 322 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) |
324 | flags &= ~XFS_TRANS_SB_DIRTY; | 323 | flags &= ~XFS_TRANS_SB_DIRTY; |
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 459ddec137a4..ab438647592a 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c | |||
@@ -79,7 +79,8 @@ xfs_trans_free_extent( | |||
79 | 79 | ||
80 | trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); | 80 | trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); |
81 | 81 | ||
82 | error = xfs_free_extent(tp, start_block, ext_len, oinfo); | 82 | error = xfs_free_extent(tp, start_block, ext_len, oinfo, |
83 | XFS_AG_RESV_NONE); | ||
83 | 84 | ||
84 | /* | 85 | /* |
85 | * Mark the transaction dirty, even on error. This ensures the | 86 | * Mark the transaction dirty, even on error. This ensures the |