aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_trans.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_trans.c')
-rw-r--r--fs/xfs/xfs_trans.c1385
1 files changed, 913 insertions, 472 deletions
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index f73e358bae8d..fdca7416c754 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. 2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * Copyright (C) 2010 Red Hat, Inc.
3 * All Rights Reserved. 4 * All Rights Reserved.
4 * 5 *
5 * This program is free software; you can redistribute it and/or 6 * This program is free software; you can redistribute it and/or
@@ -24,16 +25,12 @@
24#include "xfs_trans.h" 25#include "xfs_trans.h"
25#include "xfs_sb.h" 26#include "xfs_sb.h"
26#include "xfs_ag.h" 27#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h" 28#include "xfs_mount.h"
30#include "xfs_error.h" 29#include "xfs_error.h"
31#include "xfs_da_btree.h" 30#include "xfs_da_btree.h"
32#include "xfs_bmap_btree.h" 31#include "xfs_bmap_btree.h"
33#include "xfs_alloc_btree.h" 32#include "xfs_alloc_btree.h"
34#include "xfs_ialloc_btree.h" 33#include "xfs_ialloc_btree.h"
35#include "xfs_dir2_sf.h"
36#include "xfs_attr_sf.h"
37#include "xfs_dinode.h" 34#include "xfs_dinode.h"
38#include "xfs_inode.h" 35#include "xfs_inode.h"
39#include "xfs_btree.h" 36#include "xfs_btree.h"
@@ -44,148 +41,494 @@
44#include "xfs_trans_priv.h" 41#include "xfs_trans_priv.h"
45#include "xfs_trans_space.h" 42#include "xfs_trans_space.h"
46#include "xfs_inode_item.h" 43#include "xfs_inode_item.h"
47 44#include "xfs_trace.h"
48
49STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *);
50STATIC uint xfs_trans_count_vecs(xfs_trans_t *);
51STATIC void xfs_trans_fill_vecs(xfs_trans_t *, xfs_log_iovec_t *);
52STATIC void xfs_trans_uncommit(xfs_trans_t *, uint);
53STATIC void xfs_trans_committed(xfs_trans_t *, int);
54STATIC void xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int);
55STATIC void xfs_trans_free(xfs_trans_t *);
56 45
57kmem_zone_t *xfs_trans_zone; 46kmem_zone_t *xfs_trans_zone;
47kmem_zone_t *xfs_log_item_desc_zone;
58 48
59 49
60/* 50/*
61 * Reservation functions here avoid a huge stack in xfs_trans_init 51 * Various log reservation values.
62 * due to register overflow from temporaries in the calculations. 52 *
53 * These are based on the size of the file system block because that is what
54 * most transactions manipulate. Each adds in an additional 128 bytes per
55 * item logged to try to account for the overhead of the transaction mechanism.
56 *
57 * Note: Most of the reservations underestimate the number of allocation
58 * groups into which they could free extents in the xfs_bmap_finish() call.
59 * This is because the number in the worst case is quite high and quite
60 * unusual. In order to fix this we need to change xfs_bmap_finish() to free
61 * extents in only a single AG at a time. This will require changes to the
62 * EFI code as well, however, so that the EFI for the extents not freed is
63 * logged again in each transaction. See SGI PV #261917.
64 *
65 * Reservation functions here avoid a huge stack in xfs_trans_init due to
66 * register overflow from temporaries in the calculations.
63 */ 67 */
64 68
69
70/*
71 * In a write transaction we can allocate a maximum of 2
72 * extents. This gives:
73 * the inode getting the new extents: inode size
74 * the inode's bmap btree: max depth * block size
75 * the agfs of the ags from which the extents are allocated: 2 * sector
76 * the superblock free block counter: sector size
77 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
78 * And the bmap_finish transaction can free bmap blocks in a join:
79 * the agfs of the ags containing the blocks: 2 * sector size
80 * the agfls of the ags containing the blocks: 2 * sector size
81 * the super block free block counter: sector size
82 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
83 */
65STATIC uint 84STATIC uint
66xfs_calc_write_reservation(xfs_mount_t *mp) 85xfs_calc_write_reservation(
86 struct xfs_mount *mp)
67{ 87{
68 return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 88 return XFS_DQUOT_LOGRES(mp) +
89 MAX((mp->m_sb.sb_inodesize +
90 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
91 2 * mp->m_sb.sb_sectsize +
92 mp->m_sb.sb_sectsize +
93 XFS_ALLOCFREE_LOG_RES(mp, 2) +
94 128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
95 XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
96 (2 * mp->m_sb.sb_sectsize +
97 2 * mp->m_sb.sb_sectsize +
98 mp->m_sb.sb_sectsize +
99 XFS_ALLOCFREE_LOG_RES(mp, 2) +
100 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
69} 101}
70 102
103/*
104 * In truncating a file we free up to two extents at once. We can modify:
105 * the inode being truncated: inode size
106 * the inode's bmap btree: (max depth + 1) * block size
107 * And the bmap_finish transaction can free the blocks and bmap blocks:
108 * the agf for each of the ags: 4 * sector size
109 * the agfl for each of the ags: 4 * sector size
110 * the super block to reflect the freed blocks: sector size
111 * worst case split in allocation btrees per extent assuming 4 extents:
112 * 4 exts * 2 trees * (2 * max depth - 1) * block size
113 * the inode btree: max depth * blocksize
114 * the allocation btrees: 2 trees * (max depth - 1) * block size
115 */
71STATIC uint 116STATIC uint
72xfs_calc_itruncate_reservation(xfs_mount_t *mp) 117xfs_calc_itruncate_reservation(
118 struct xfs_mount *mp)
73{ 119{
74 return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 120 return XFS_DQUOT_LOGRES(mp) +
121 MAX((mp->m_sb.sb_inodesize +
122 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
123 128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
124 (4 * mp->m_sb.sb_sectsize +
125 4 * mp->m_sb.sb_sectsize +
126 mp->m_sb.sb_sectsize +
127 XFS_ALLOCFREE_LOG_RES(mp, 4) +
128 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
129 128 * 5 +
130 XFS_ALLOCFREE_LOG_RES(mp, 1) +
131 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
132 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
75} 133}
76 134
135/*
136 * In renaming a files we can modify:
137 * the four inodes involved: 4 * inode size
138 * the two directory btrees: 2 * (max depth + v2) * dir block size
139 * the two directory bmap btrees: 2 * max depth * block size
140 * And the bmap_finish transaction can free dir and bmap blocks (two sets
141 * of bmap blocks) giving:
142 * the agf for the ags in which the blocks live: 3 * sector size
143 * the agfl for the ags in which the blocks live: 3 * sector size
144 * the superblock for the free block count: sector size
145 * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
146 */
77STATIC uint 147STATIC uint
78xfs_calc_rename_reservation(xfs_mount_t *mp) 148xfs_calc_rename_reservation(
149 struct xfs_mount *mp)
79{ 150{
80 return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 151 return XFS_DQUOT_LOGRES(mp) +
152 MAX((4 * mp->m_sb.sb_inodesize +
153 2 * XFS_DIROP_LOG_RES(mp) +
154 128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
155 (3 * mp->m_sb.sb_sectsize +
156 3 * mp->m_sb.sb_sectsize +
157 mp->m_sb.sb_sectsize +
158 XFS_ALLOCFREE_LOG_RES(mp, 3) +
159 128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
81} 160}
82 161
162/*
163 * For creating a link to an inode:
164 * the parent directory inode: inode size
165 * the linked inode: inode size
166 * the directory btree could split: (max depth + v2) * dir block size
167 * the directory bmap btree could join or split: (max depth + v2) * blocksize
168 * And the bmap_finish transaction can free some bmap blocks giving:
169 * the agf for the ag in which the blocks live: sector size
170 * the agfl for the ag in which the blocks live: sector size
171 * the superblock for the free block count: sector size
172 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
173 */
83STATIC uint 174STATIC uint
84xfs_calc_link_reservation(xfs_mount_t *mp) 175xfs_calc_link_reservation(
176 struct xfs_mount *mp)
85{ 177{
86 return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 178 return XFS_DQUOT_LOGRES(mp) +
179 MAX((mp->m_sb.sb_inodesize +
180 mp->m_sb.sb_inodesize +
181 XFS_DIROP_LOG_RES(mp) +
182 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
183 (mp->m_sb.sb_sectsize +
184 mp->m_sb.sb_sectsize +
185 mp->m_sb.sb_sectsize +
186 XFS_ALLOCFREE_LOG_RES(mp, 1) +
187 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
87} 188}
88 189
190/*
191 * For removing a directory entry we can modify:
192 * the parent directory inode: inode size
193 * the removed inode: inode size
194 * the directory btree could join: (max depth + v2) * dir block size
195 * the directory bmap btree could join or split: (max depth + v2) * blocksize
196 * And the bmap_finish transaction can free the dir and bmap blocks giving:
197 * the agf for the ag in which the blocks live: 2 * sector size
198 * the agfl for the ag in which the blocks live: 2 * sector size
199 * the superblock for the free block count: sector size
200 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
201 */
89STATIC uint 202STATIC uint
90xfs_calc_remove_reservation(xfs_mount_t *mp) 203xfs_calc_remove_reservation(
204 struct xfs_mount *mp)
91{ 205{
92 return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 206 return XFS_DQUOT_LOGRES(mp) +
207 MAX((mp->m_sb.sb_inodesize +
208 mp->m_sb.sb_inodesize +
209 XFS_DIROP_LOG_RES(mp) +
210 128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
211 (2 * mp->m_sb.sb_sectsize +
212 2 * mp->m_sb.sb_sectsize +
213 mp->m_sb.sb_sectsize +
214 XFS_ALLOCFREE_LOG_RES(mp, 2) +
215 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
93} 216}
94 217
218/*
219 * For symlink we can modify:
220 * the parent directory inode: inode size
221 * the new inode: inode size
222 * the inode btree entry: 1 block
223 * the directory btree: (max depth + v2) * dir block size
224 * the directory inode's bmap btree: (max depth + v2) * block size
225 * the blocks for the symlink: 1 kB
226 * Or in the first xact we allocate some inodes giving:
227 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
228 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
229 * the inode btree: max depth * blocksize
230 * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
231 */
95STATIC uint 232STATIC uint
96xfs_calc_symlink_reservation(xfs_mount_t *mp) 233xfs_calc_symlink_reservation(
234 struct xfs_mount *mp)
97{ 235{
98 return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 236 return XFS_DQUOT_LOGRES(mp) +
237 MAX((mp->m_sb.sb_inodesize +
238 mp->m_sb.sb_inodesize +
239 XFS_FSB_TO_B(mp, 1) +
240 XFS_DIROP_LOG_RES(mp) +
241 1024 +
242 128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
243 (2 * mp->m_sb.sb_sectsize +
244 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
245 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
246 XFS_ALLOCFREE_LOG_RES(mp, 1) +
247 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
248 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
99} 249}
100 250
251/*
252 * For create we can modify:
253 * the parent directory inode: inode size
254 * the new inode: inode size
255 * the inode btree entry: block size
256 * the superblock for the nlink flag: sector size
257 * the directory btree: (max depth + v2) * dir block size
258 * the directory inode's bmap btree: (max depth + v2) * block size
259 * Or in the first xact we allocate some inodes giving:
260 * the agi and agf of the ag getting the new inodes: 2 * sectorsize
261 * the superblock for the nlink flag: sector size
262 * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
263 * the inode btree: max depth * blocksize
264 * the allocation btrees: 2 trees * (max depth - 1) * block size
265 */
101STATIC uint 266STATIC uint
102xfs_calc_create_reservation(xfs_mount_t *mp) 267xfs_calc_create_reservation(
268 struct xfs_mount *mp)
103{ 269{
104 return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 270 return XFS_DQUOT_LOGRES(mp) +
271 MAX((mp->m_sb.sb_inodesize +
272 mp->m_sb.sb_inodesize +
273 mp->m_sb.sb_sectsize +
274 XFS_FSB_TO_B(mp, 1) +
275 XFS_DIROP_LOG_RES(mp) +
276 128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
277 (3 * mp->m_sb.sb_sectsize +
278 XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
279 XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
280 XFS_ALLOCFREE_LOG_RES(mp, 1) +
281 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
282 XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
105} 283}
106 284
285/*
286 * Making a new directory is the same as creating a new file.
287 */
107STATIC uint 288STATIC uint
108xfs_calc_mkdir_reservation(xfs_mount_t *mp) 289xfs_calc_mkdir_reservation(
290 struct xfs_mount *mp)
109{ 291{
110 return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 292 return xfs_calc_create_reservation(mp);
111} 293}
112 294
295/*
296 * In freeing an inode we can modify:
297 * the inode being freed: inode size
298 * the super block free inode counter: sector size
299 * the agi hash list and counters: sector size
300 * the inode btree entry: block size
301 * the on disk inode before ours in the agi hash list: inode cluster size
302 * the inode btree: max depth * blocksize
303 * the allocation btrees: 2 trees * (max depth - 1) * block size
304 */
113STATIC uint 305STATIC uint
114xfs_calc_ifree_reservation(xfs_mount_t *mp) 306xfs_calc_ifree_reservation(
307 struct xfs_mount *mp)
115{ 308{
116 return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 309 return XFS_DQUOT_LOGRES(mp) +
310 mp->m_sb.sb_inodesize +
311 mp->m_sb.sb_sectsize +
312 mp->m_sb.sb_sectsize +
313 XFS_FSB_TO_B(mp, 1) +
314 MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
315 XFS_INODE_CLUSTER_SIZE(mp)) +
316 128 * 5 +
317 XFS_ALLOCFREE_LOG_RES(mp, 1) +
318 128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
319 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
117} 320}
118 321
322/*
323 * When only changing the inode we log the inode and possibly the superblock
324 * We also add a bit of slop for the transaction stuff.
325 */
119STATIC uint 326STATIC uint
120xfs_calc_ichange_reservation(xfs_mount_t *mp) 327xfs_calc_ichange_reservation(
328 struct xfs_mount *mp)
121{ 329{
122 return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 330 return XFS_DQUOT_LOGRES(mp) +
331 mp->m_sb.sb_inodesize +
332 mp->m_sb.sb_sectsize +
333 512;
334
123} 335}
124 336
337/*
338 * Growing the data section of the filesystem.
339 * superblock
340 * agi and agf
341 * allocation btrees
342 */
125STATIC uint 343STATIC uint
126xfs_calc_growdata_reservation(xfs_mount_t *mp) 344xfs_calc_growdata_reservation(
345 struct xfs_mount *mp)
127{ 346{
128 return XFS_CALC_GROWDATA_LOG_RES(mp); 347 return mp->m_sb.sb_sectsize * 3 +
348 XFS_ALLOCFREE_LOG_RES(mp, 1) +
349 128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
129} 350}
130 351
352/*
353 * Growing the rt section of the filesystem.
354 * In the first set of transactions (ALLOC) we allocate space to the
355 * bitmap or summary files.
356 * superblock: sector size
357 * agf of the ag from which the extent is allocated: sector size
358 * bmap btree for bitmap/summary inode: max depth * blocksize
359 * bitmap/summary inode: inode size
360 * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
361 */
131STATIC uint 362STATIC uint
132xfs_calc_growrtalloc_reservation(xfs_mount_t *mp) 363xfs_calc_growrtalloc_reservation(
364 struct xfs_mount *mp)
133{ 365{
134 return XFS_CALC_GROWRTALLOC_LOG_RES(mp); 366 return 2 * mp->m_sb.sb_sectsize +
367 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
368 mp->m_sb.sb_inodesize +
369 XFS_ALLOCFREE_LOG_RES(mp, 1) +
370 128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
371 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
135} 372}
136 373
374/*
375 * Growing the rt section of the filesystem.
376 * In the second set of transactions (ZERO) we zero the new metadata blocks.
377 * one bitmap/summary block: blocksize
378 */
137STATIC uint 379STATIC uint
138xfs_calc_growrtzero_reservation(xfs_mount_t *mp) 380xfs_calc_growrtzero_reservation(
381 struct xfs_mount *mp)
139{ 382{
140 return XFS_CALC_GROWRTZERO_LOG_RES(mp); 383 return mp->m_sb.sb_blocksize + 128;
141} 384}
142 385
386/*
387 * Growing the rt section of the filesystem.
388 * In the third set of transactions (FREE) we update metadata without
389 * allocating any new blocks.
390 * superblock: sector size
391 * bitmap inode: inode size
392 * summary inode: inode size
393 * one bitmap block: blocksize
394 * summary blocks: new summary size
395 */
143STATIC uint 396STATIC uint
144xfs_calc_growrtfree_reservation(xfs_mount_t *mp) 397xfs_calc_growrtfree_reservation(
398 struct xfs_mount *mp)
145{ 399{
146 return XFS_CALC_GROWRTFREE_LOG_RES(mp); 400 return mp->m_sb.sb_sectsize +
401 2 * mp->m_sb.sb_inodesize +
402 mp->m_sb.sb_blocksize +
403 mp->m_rsumsize +
404 128 * 5;
147} 405}
148 406
407/*
408 * Logging the inode modification timestamp on a synchronous write.
409 * inode
410 */
149STATIC uint 411STATIC uint
150xfs_calc_swrite_reservation(xfs_mount_t *mp) 412xfs_calc_swrite_reservation(
413 struct xfs_mount *mp)
151{ 414{
152 return XFS_CALC_SWRITE_LOG_RES(mp); 415 return mp->m_sb.sb_inodesize + 128;
153} 416}
154 417
418/*
419 * Logging the inode mode bits when writing a setuid/setgid file
420 * inode
421 */
155STATIC uint 422STATIC uint
156xfs_calc_writeid_reservation(xfs_mount_t *mp) 423xfs_calc_writeid_reservation(xfs_mount_t *mp)
157{ 424{
158 return XFS_CALC_WRITEID_LOG_RES(mp); 425 return mp->m_sb.sb_inodesize + 128;
159} 426}
160 427
428/*
429 * Converting the inode from non-attributed to attributed.
430 * the inode being converted: inode size
431 * agf block and superblock (for block allocation)
432 * the new block (directory sized)
433 * bmap blocks for the new directory block
434 * allocation btrees
435 */
161STATIC uint 436STATIC uint
162xfs_calc_addafork_reservation(xfs_mount_t *mp) 437xfs_calc_addafork_reservation(
438 struct xfs_mount *mp)
163{ 439{
164 return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 440 return XFS_DQUOT_LOGRES(mp) +
441 mp->m_sb.sb_inodesize +
442 mp->m_sb.sb_sectsize * 2 +
443 mp->m_dirblksize +
444 XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
445 XFS_ALLOCFREE_LOG_RES(mp, 1) +
446 128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
447 XFS_ALLOCFREE_LOG_COUNT(mp, 1));
165} 448}
166 449
450/*
451 * Removing the attribute fork of a file
452 * the inode being truncated: inode size
453 * the inode's bmap btree: max depth * block size
454 * And the bmap_finish transaction can free the blocks and bmap blocks:
455 * the agf for each of the ags: 4 * sector size
456 * the agfl for each of the ags: 4 * sector size
457 * the super block to reflect the freed blocks: sector size
458 * worst case split in allocation btrees per extent assuming 4 extents:
459 * 4 exts * 2 trees * (2 * max depth - 1) * block size
460 */
167STATIC uint 461STATIC uint
168xfs_calc_attrinval_reservation(xfs_mount_t *mp) 462xfs_calc_attrinval_reservation(
463 struct xfs_mount *mp)
169{ 464{
170 return XFS_CALC_ATTRINVAL_LOG_RES(mp); 465 return MAX((mp->m_sb.sb_inodesize +
466 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
467 128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
468 (4 * mp->m_sb.sb_sectsize +
469 4 * mp->m_sb.sb_sectsize +
470 mp->m_sb.sb_sectsize +
471 XFS_ALLOCFREE_LOG_RES(mp, 4) +
472 128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
171} 473}
172 474
475/*
476 * Setting an attribute.
477 * the inode getting the attribute
478 * the superblock for allocations
479 * the agfs extents are allocated from
480 * the attribute btree * max depth
481 * the inode allocation btree
482 * Since attribute transaction space is dependent on the size of the attribute,
483 * the calculation is done partially at mount time and partially at runtime.
484 */
173STATIC uint 485STATIC uint
174xfs_calc_attrset_reservation(xfs_mount_t *mp) 486xfs_calc_attrset_reservation(
487 struct xfs_mount *mp)
175{ 488{
176 return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 489 return XFS_DQUOT_LOGRES(mp) +
490 mp->m_sb.sb_inodesize +
491 mp->m_sb.sb_sectsize +
492 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
493 128 * (2 + XFS_DA_NODE_MAXDEPTH);
177} 494}
178 495
496/*
497 * Removing an attribute.
498 * the inode: inode size
499 * the attribute btree could join: max depth * block size
500 * the inode bmap btree could join or split: max depth * block size
501 * And the bmap_finish transaction can free the attr blocks freed giving:
502 * the agf for the ag in which the blocks live: 2 * sector size
503 * the agfl for the ag in which the blocks live: 2 * sector size
504 * the superblock for the free block count: sector size
505 * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
506 */
179STATIC uint 507STATIC uint
180xfs_calc_attrrm_reservation(xfs_mount_t *mp) 508xfs_calc_attrrm_reservation(
509 struct xfs_mount *mp)
181{ 510{
182 return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp); 511 return XFS_DQUOT_LOGRES(mp) +
512 MAX((mp->m_sb.sb_inodesize +
513 XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
514 XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
515 128 * (1 + XFS_DA_NODE_MAXDEPTH +
516 XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
517 (2 * mp->m_sb.sb_sectsize +
518 2 * mp->m_sb.sb_sectsize +
519 mp->m_sb.sb_sectsize +
520 XFS_ALLOCFREE_LOG_RES(mp, 2) +
521 128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
183} 522}
184 523
524/*
525 * Clearing a bad agino number in an agi hash bucket.
526 */
185STATIC uint 527STATIC uint
186xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp) 528xfs_calc_clear_agi_bucket_reservation(
529 struct xfs_mount *mp)
187{ 530{
188 return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp); 531 return mp->m_sb.sb_sectsize + 128;
189} 532}
190 533
191/* 534/*
@@ -194,11 +537,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
194 */ 537 */
195void 538void
196xfs_trans_init( 539xfs_trans_init(
197 xfs_mount_t *mp) 540 struct xfs_mount *mp)
198{ 541{
199 xfs_trans_reservations_t *resp; 542 struct xfs_trans_reservations *resp = &mp->m_reservations;
200 543
201 resp = &(mp->m_reservations);
202 resp->tr_write = xfs_calc_write_reservation(mp); 544 resp->tr_write = xfs_calc_write_reservation(mp);
203 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); 545 resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
204 resp->tr_rename = xfs_calc_rename_reservation(mp); 546 resp->tr_rename = xfs_calc_rename_reservation(mp);
@@ -253,14 +595,30 @@ _xfs_trans_alloc(
253 tp->t_magic = XFS_TRANS_MAGIC; 595 tp->t_magic = XFS_TRANS_MAGIC;
254 tp->t_type = type; 596 tp->t_type = type;
255 tp->t_mountp = mp; 597 tp->t_mountp = mp;
256 tp->t_items_free = XFS_LIC_NUM_SLOTS; 598 INIT_LIST_HEAD(&tp->t_items);
257 tp->t_busy_free = XFS_LBC_NUM_SLOTS; 599 INIT_LIST_HEAD(&tp->t_busy);
258 xfs_lic_init(&(tp->t_items));
259 XFS_LBC_INIT(&(tp->t_busy));
260 return tp; 600 return tp;
261} 601}
262 602
263/* 603/*
604 * Free the transaction structure. If there is more clean up
605 * to do when the structure is freed, add it here.
606 */
607STATIC void
608xfs_trans_free(
609 struct xfs_trans *tp)
610{
611 struct xfs_busy_extent *busyp, *n;
612
613 list_for_each_entry_safe(busyp, n, &tp->t_busy, list)
614 xfs_alloc_busy_clear(tp->t_mountp, busyp);
615
616 atomic_dec(&tp->t_mountp->m_active_trans);
617 xfs_trans_free_dqinfo(tp);
618 kmem_zone_free(xfs_trans_zone, tp);
619}
620
621/*
264 * This is called to create a new transaction which will share the 622 * This is called to create a new transaction which will share the
265 * permanent log reservation of the given transaction. The remaining 623 * permanent log reservation of the given transaction. The remaining
266 * unused block and rt extent reservations are also inherited. This 624 * unused block and rt extent reservations are also inherited. This
@@ -282,10 +640,8 @@ xfs_trans_dup(
282 ntp->t_magic = XFS_TRANS_MAGIC; 640 ntp->t_magic = XFS_TRANS_MAGIC;
283 ntp->t_type = tp->t_type; 641 ntp->t_type = tp->t_type;
284 ntp->t_mountp = tp->t_mountp; 642 ntp->t_mountp = tp->t_mountp;
285 ntp->t_items_free = XFS_LIC_NUM_SLOTS; 643 INIT_LIST_HEAD(&ntp->t_items);
286 ntp->t_busy_free = XFS_LBC_NUM_SLOTS; 644 INIT_LIST_HEAD(&ntp->t_busy);
287 xfs_lic_init(&(ntp->t_items));
288 XFS_LBC_INIT(&(ntp->t_busy));
289 645
290 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 646 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
291 ASSERT(tp->t_ticket != NULL); 647 ASSERT(tp->t_ticket != NULL);
@@ -421,7 +777,6 @@ undo_blocks:
421 return error; 777 return error;
422} 778}
423 779
424
425/* 780/*
426 * Record the indicated change to the given field for application 781 * Record the indicated change to the given field for application
427 * to the file system's superblock when the transaction commits. 782 * to the file system's superblock when the transaction commits.
@@ -650,7 +1005,7 @@ xfs_trans_apply_sb_deltas(
650 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we 1005 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
651 * still need to update the incore superblock with the changes. 1006 * still need to update the incore superblock with the changes.
652 */ 1007 */
653STATIC void 1008void
654xfs_trans_unreserve_and_mod_sb( 1009xfs_trans_unreserve_and_mod_sb(
655 xfs_trans_t *tp) 1010 xfs_trans_t *tp)
656{ 1011{
@@ -764,94 +1119,340 @@ xfs_trans_unreserve_and_mod_sb(
764 } 1119 }
765} 1120}
766 1121
767
768/* 1122/*
769 * xfs_trans_commit 1123 * Add the given log item to the transaction's list of log items.
770 * 1124 *
771 * Commit the given transaction to the log a/synchronously. 1125 * The log item will now point to its new descriptor with its li_desc field.
1126 */
1127void
1128xfs_trans_add_item(
1129 struct xfs_trans *tp,
1130 struct xfs_log_item *lip)
1131{
1132 struct xfs_log_item_desc *lidp;
1133
1134 ASSERT(lip->li_mountp = tp->t_mountp);
1135 ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
1136
1137 lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
1138
1139 lidp->lid_item = lip;
1140 lidp->lid_flags = 0;
1141 lidp->lid_size = 0;
1142 list_add_tail(&lidp->lid_trans, &tp->t_items);
1143
1144 lip->li_desc = lidp;
1145}
1146
1147STATIC void
1148xfs_trans_free_item_desc(
1149 struct xfs_log_item_desc *lidp)
1150{
1151 list_del_init(&lidp->lid_trans);
1152 kmem_zone_free(xfs_log_item_desc_zone, lidp);
1153}
1154
1155/*
1156 * Unlink and free the given descriptor.
1157 */
1158void
1159xfs_trans_del_item(
1160 struct xfs_log_item *lip)
1161{
1162 xfs_trans_free_item_desc(lip->li_desc);
1163 lip->li_desc = NULL;
1164}
1165
1166/*
1167 * Unlock all of the items of a transaction and free all the descriptors
1168 * of that transaction.
1169 */
1170STATIC void
1171xfs_trans_free_items(
1172 struct xfs_trans *tp,
1173 xfs_lsn_t commit_lsn,
1174 int flags)
1175{
1176 struct xfs_log_item_desc *lidp, *next;
1177
1178 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1179 struct xfs_log_item *lip = lidp->lid_item;
1180
1181 lip->li_desc = NULL;
1182
1183 if (commit_lsn != NULLCOMMITLSN)
1184 IOP_COMMITTING(lip, commit_lsn);
1185 if (flags & XFS_TRANS_ABORT)
1186 lip->li_flags |= XFS_LI_ABORTED;
1187 IOP_UNLOCK(lip);
1188
1189 xfs_trans_free_item_desc(lidp);
1190 }
1191}
1192
1193/*
1194 * Unlock the items associated with a transaction.
772 * 1195 *
773 * XFS disk error handling mechanism is not based on a typical 1196 * Items which were not logged should be freed. Those which were logged must
774 * transaction abort mechanism. Logically after the filesystem 1197 * still be tracked so they can be unpinned when the transaction commits.
775 * gets marked 'SHUTDOWN', we can't let any new transactions
776 * be durable - ie. committed to disk - because some metadata might
777 * be inconsistent. In such cases, this returns an error, and the
778 * caller may assume that all locked objects joined to the transaction
779 * have already been unlocked as if the commit had succeeded.
780 * Do not reference the transaction structure after this call.
781 */ 1198 */
782 /*ARGSUSED*/ 1199STATIC void
783int 1200xfs_trans_unlock_items(
784_xfs_trans_commit( 1201 struct xfs_trans *tp,
785 xfs_trans_t *tp, 1202 xfs_lsn_t commit_lsn)
786 uint flags,
787 int *log_flushed)
788{ 1203{
789 xfs_log_iovec_t *log_vector; 1204 struct xfs_log_item_desc *lidp, *next;
790 int nvec; 1205
791 xfs_mount_t *mp; 1206 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
792 xfs_lsn_t commit_lsn; 1207 struct xfs_log_item *lip = lidp->lid_item;
793 /* REFERENCED */ 1208
794 int error; 1209 lip->li_desc = NULL;
795 int log_flags; 1210
796 int sync; 1211 if (commit_lsn != NULLCOMMITLSN)
797#define XFS_TRANS_LOGVEC_COUNT 16 1212 IOP_COMMITTING(lip, commit_lsn);
798 xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT]; 1213 IOP_UNLOCK(lip);
799 struct xlog_in_core *commit_iclog; 1214
800 int shutdown; 1215 /*
1216 * Free the descriptor if the item is not dirty
1217 * within this transaction.
1218 */
1219 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1220 xfs_trans_free_item_desc(lidp);
1221 }
1222}
1223
1224/*
1225 * Total up the number of log iovecs needed to commit this
1226 * transaction. The transaction itself needs one for the
1227 * transaction header. Ask each dirty item in turn how many
1228 * it needs to get the total.
1229 */
1230static uint
1231xfs_trans_count_vecs(
1232 struct xfs_trans *tp)
1233{
1234 int nvecs;
1235 struct xfs_log_item_desc *lidp;
1236
1237 nvecs = 1;
1238
1239 /* In the non-debug case we need to start bailing out if we
1240 * didn't find a log_item here, return zero and let trans_commit
1241 * deal with it.
1242 */
1243 if (list_empty(&tp->t_items)) {
1244 ASSERT(0);
1245 return 0;
1246 }
1247
1248 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1249 /*
1250 * Skip items which aren't dirty in this transaction.
1251 */
1252 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1253 continue;
1254 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1255 nvecs += lidp->lid_size;
1256 }
801 1257
802 commit_lsn = -1; 1258 return nvecs;
1259}
1260
1261/*
1262 * Fill in the vector with pointers to data to be logged
1263 * by this transaction. The transaction header takes
1264 * the first vector, and then each dirty item takes the
1265 * number of vectors it indicated it needed in xfs_trans_count_vecs().
1266 *
1267 * As each item fills in the entries it needs, also pin the item
1268 * so that it cannot be flushed out until the log write completes.
1269 */
1270static void
1271xfs_trans_fill_vecs(
1272 struct xfs_trans *tp,
1273 struct xfs_log_iovec *log_vector)
1274{
1275 struct xfs_log_item_desc *lidp;
1276 struct xfs_log_iovec *vecp;
1277 uint nitems;
803 1278
804 /* 1279 /*
805 * Determine whether this commit is releasing a permanent 1280 * Skip over the entry for the transaction header, we'll
806 * log reservation or not. 1281 * fill that in at the end.
807 */ 1282 */
808 if (flags & XFS_TRANS_RELEASE_LOG_RES) { 1283 vecp = log_vector + 1;
809 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 1284
810 log_flags = XFS_LOG_REL_PERM_RESERV; 1285 nitems = 0;
811 } else { 1286 ASSERT(!list_empty(&tp->t_items));
812 log_flags = 0; 1287 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1288 /* Skip items which aren't dirty in this transaction. */
1289 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1290 continue;
1291
1292 /*
1293 * The item may be marked dirty but not log anything. This can
1294 * be used to get called when a transaction is committed.
1295 */
1296 if (lidp->lid_size)
1297 nitems++;
1298 IOP_FORMAT(lidp->lid_item, vecp);
1299 vecp += lidp->lid_size;
1300 IOP_PIN(lidp->lid_item);
813 } 1301 }
814 mp = tp->t_mountp;
815 1302
816 /* 1303 /*
817 * If there is nothing to be logged by the transaction, 1304 * Now that we've counted the number of items in this transaction, fill
818 * then unlock all of the items associated with the 1305 * in the transaction header. Note that the transaction header does not
819 * transaction and free the transaction structure. 1306 * have a log item.
820 * Also make sure to return any reserved blocks to
821 * the free pool.
822 */ 1307 */
823shut_us_down: 1308 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
824 shutdown = XFS_FORCED_SHUTDOWN(mp) ? EIO : 0; 1309 tp->t_header.th_type = tp->t_type;
825 if (!(tp->t_flags & XFS_TRANS_DIRTY) || shutdown) { 1310 tp->t_header.th_num_items = nitems;
826 xfs_trans_unreserve_and_mod_sb(tp); 1311 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1312 log_vector->i_len = sizeof(xfs_trans_header_t);
1313 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1314}
1315
1316/*
1317 * The committed item processing consists of calling the committed routine of
1318 * each logged item, updating the item's position in the AIL if necessary, and
1319 * unpinning each item. If the committed routine returns -1, then do nothing
1320 * further with the item because it may have been freed.
1321 *
1322 * Since items are unlocked when they are copied to the incore log, it is
1323 * possible for two transactions to be completing and manipulating the same
1324 * item simultaneously. The AIL lock will protect the lsn field of each item.
1325 * The value of this field can never go backwards.
1326 *
1327 * We unpin the items after repositioning them in the AIL, because otherwise
1328 * they could be immediately flushed and we'd have to race with the flusher
1329 * trying to pull the item from the AIL as we add it.
1330 */
1331void
1332xfs_trans_item_committed(
1333 struct xfs_log_item *lip,
1334 xfs_lsn_t commit_lsn,
1335 int aborted)
1336{
1337 xfs_lsn_t item_lsn;
1338 struct xfs_ail *ailp;
1339
1340 if (aborted)
1341 lip->li_flags |= XFS_LI_ABORTED;
1342 item_lsn = IOP_COMMITTED(lip, commit_lsn);
1343
1344 /* If the committed routine returns -1, item has been freed. */
1345 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1346 return;
1347
1348 /*
1349 * If the returned lsn is greater than what it contained before, update
1350 * the location of the item in the AIL. If it is not, then do nothing.
1351 * Items can never move backwards in the AIL.
1352 *
1353 * While the new lsn should usually be greater, it is possible that a
1354 * later transaction completing simultaneously with an earlier one
1355 * using the same item could complete first with a higher lsn. This
1356 * would cause the earlier transaction to fail the test below.
1357 */
1358 ailp = lip->li_ailp;
1359 spin_lock(&ailp->xa_lock);
1360 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
827 /* 1361 /*
828 * It is indeed possible for the transaction to be 1362 * This will set the item's lsn to item_lsn and update the
829 * not dirty but the dqinfo portion to be. All that 1363 * position of the item in the AIL.
830 * means is that we have some (non-persistent) quota 1364 *
831 * reservations that need to be unreserved. 1365 * xfs_trans_ail_update() drops the AIL lock.
832 */ 1366 */
833 xfs_trans_unreserve_and_mod_dquots(tp); 1367 xfs_trans_ail_update(ailp, lip, item_lsn);
834 if (tp->t_ticket) { 1368 } else {
835 commit_lsn = xfs_log_done(mp, tp->t_ticket, 1369 spin_unlock(&ailp->xa_lock);
836 NULL, log_flags);
837 if (commit_lsn == -1 && !shutdown)
838 shutdown = XFS_ERROR(EIO);
839 }
840 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
841 xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0);
842 xfs_trans_free_busy(tp);
843 xfs_trans_free(tp);
844 XFS_STATS_INC(xs_trans_empty);
845 return (shutdown);
846 } 1370 }
847 ASSERT(tp->t_ticket != NULL);
848 1371
849 /* 1372 /*
850 * If we need to update the superblock, then do it now. 1373 * Now that we've repositioned the item in the AIL, unpin it so it can
1374 * be flushed. Pass information about buffer stale state down from the
1375 * log item flags, if anyone else stales the buffer we do not want to
1376 * pay any attention to it.
851 */ 1377 */
852 if (tp->t_flags & XFS_TRANS_SB_DIRTY) 1378 IOP_UNPIN(lip, 0);
853 xfs_trans_apply_sb_deltas(tp); 1379}
854 xfs_trans_apply_dquot_deltas(tp); 1380
1381/*
1382 * This is typically called by the LM when a transaction has been fully
1383 * committed to disk. It needs to unpin the items which have
1384 * been logged by the transaction and update their positions
1385 * in the AIL if necessary.
1386 *
1387 * This also gets called when the transactions didn't get written out
1388 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1389 */
1390STATIC void
1391xfs_trans_committed(
1392 struct xfs_trans *tp,
1393 int abortflag)
1394{
1395 struct xfs_log_item_desc *lidp, *next;
1396
1397 /* Call the transaction's completion callback if there is one. */
1398 if (tp->t_callback != NULL)
1399 tp->t_callback(tp, tp->t_callarg);
1400
1401 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1402 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
1403 xfs_trans_free_item_desc(lidp);
1404 }
1405
1406 xfs_trans_free(tp);
1407}
1408
1409/*
1410 * Called from the trans_commit code when we notice that
1411 * the filesystem is in the middle of a forced shutdown.
1412 */
1413STATIC void
1414xfs_trans_uncommit(
1415 struct xfs_trans *tp,
1416 uint flags)
1417{
1418 struct xfs_log_item_desc *lidp;
1419
1420 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1421 /*
1422 * Unpin all but those that aren't dirty.
1423 */
1424 if (lidp->lid_flags & XFS_LID_DIRTY)
1425 IOP_UNPIN(lidp->lid_item, 1);
1426 }
1427
1428 xfs_trans_unreserve_and_mod_sb(tp);
1429 xfs_trans_unreserve_and_mod_dquots(tp);
1430
1431 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
1432 xfs_trans_free(tp);
1433}
1434
1435/*
1436 * Format the transaction direct to the iclog. This isolates the physical
1437 * transaction commit operation from the logical operation and hence allows
1438 * other methods to be introduced without affecting the existing commit path.
1439 */
1440static int
1441xfs_trans_commit_iclog(
1442 struct xfs_mount *mp,
1443 struct xfs_trans *tp,
1444 xfs_lsn_t *commit_lsn,
1445 int flags)
1446{
1447 int shutdown;
1448 int error;
1449 int log_flags = 0;
1450 struct xlog_in_core *commit_iclog;
1451#define XFS_TRANS_LOGVEC_COUNT 16
1452 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
1453 struct xfs_log_iovec *log_vector;
1454 uint nvec;
1455
855 1456
856 /* 1457 /*
857 * Ask each log item how many log_vector entries it will 1458 * Ask each log item how many log_vector entries it will
@@ -861,8 +1462,7 @@ shut_us_down:
861 */ 1462 */
862 nvec = xfs_trans_count_vecs(tp); 1463 nvec = xfs_trans_count_vecs(tp);
863 if (nvec == 0) { 1464 if (nvec == 0) {
864 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1465 return ENOMEM; /* triggers a shutdown! */
865 goto shut_us_down;
866 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) { 1466 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
867 log_vector = log_vector_fast; 1467 log_vector = log_vector_fast;
868 } else { 1468 } else {
@@ -877,6 +1477,9 @@ shut_us_down:
877 */ 1477 */
878 xfs_trans_fill_vecs(tp, log_vector); 1478 xfs_trans_fill_vecs(tp, log_vector);
879 1479
1480 if (flags & XFS_TRANS_RELEASE_LOG_RES)
1481 log_flags = XFS_LOG_REL_PERM_RESERV;
1482
880 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn)); 1483 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
881 1484
882 /* 1485 /*
@@ -884,18 +1487,19 @@ shut_us_down:
884 * at any time after this call. However, all the items associated 1487 * at any time after this call. However, all the items associated
885 * with the transaction are still locked and pinned in memory. 1488 * with the transaction are still locked and pinned in memory.
886 */ 1489 */
887 commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags); 1490 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
1491
1492 tp->t_commit_lsn = *commit_lsn;
1493 trace_xfs_trans_commit_lsn(tp);
888 1494
889 tp->t_commit_lsn = commit_lsn; 1495 if (nvec > XFS_TRANS_LOGVEC_COUNT)
890 if (nvec > XFS_TRANS_LOGVEC_COUNT) {
891 kmem_free(log_vector); 1496 kmem_free(log_vector);
892 }
893 1497
894 /* 1498 /*
895 * If we got a log write error. Unpin the logitems that we 1499 * If we got a log write error. Unpin the logitems that we
896 * had pinned, clean up, free trans structure, and return error. 1500 * had pinned, clean up, free trans structure, and return error.
897 */ 1501 */
898 if (error || commit_lsn == -1) { 1502 if (error || *commit_lsn == -1) {
899 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1503 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
900 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT); 1504 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
901 return XFS_ERROR(EIO); 1505 return XFS_ERROR(EIO);
@@ -909,8 +1513,6 @@ shut_us_down:
909 */ 1513 */
910 xfs_trans_unreserve_and_mod_sb(tp); 1514 xfs_trans_unreserve_and_mod_sb(tp);
911 1515
912 sync = tp->t_flags & XFS_TRANS_SYNC;
913
914 /* 1516 /*
915 * Tell the LM to call the transaction completion routine 1517 * Tell the LM to call the transaction completion routine
916 * when the log write with LSN commit_lsn completes (e.g. 1518 * when the log write with LSN commit_lsn completes (e.g.
@@ -953,7 +1555,7 @@ shut_us_down:
953 * the commit lsn of this transaction for dependency tracking 1555 * the commit lsn of this transaction for dependency tracking
954 * purposes. 1556 * purposes.
955 */ 1557 */
956 xfs_trans_unlock_items(tp, commit_lsn); 1558 xfs_trans_unlock_items(tp, *commit_lsn);
957 1559
958 /* 1560 /*
959 * If we detected a log error earlier, finish committing 1561 * If we detected a log error earlier, finish committing
@@ -973,156 +1575,198 @@ shut_us_down:
973 * and the items are released we can finally allow the iclog to 1575 * and the items are released we can finally allow the iclog to
974 * go to disk. 1576 * go to disk.
975 */ 1577 */
976 error = xfs_log_release_iclog(mp, commit_iclog); 1578 return xfs_log_release_iclog(mp, commit_iclog);
977
978 /*
979 * If the transaction needs to be synchronous, then force the
980 * log out now and wait for it.
981 */
982 if (sync) {
983 if (!error) {
984 error = _xfs_log_force_lsn(mp, commit_lsn,
985 XFS_LOG_SYNC, log_flushed);
986 }
987 XFS_STATS_INC(xs_trans_sync);
988 } else {
989 XFS_STATS_INC(xs_trans_async);
990 }
991
992 return (error);
993} 1579}
994 1580
995
996/* 1581/*
997 * Total up the number of log iovecs needed to commit this 1582 * Walk the log items and allocate log vector structures for
998 * transaction. The transaction itself needs one for the 1583 * each item large enough to fit all the vectors they require.
999 * transaction header. Ask each dirty item in turn how many 1584 * Note that this format differs from the old log vector format in
1000 * it needs to get the total. 1585 * that there is no transaction header in these log vectors.
1001 */ 1586 */
1002STATIC uint 1587STATIC struct xfs_log_vec *
1003xfs_trans_count_vecs( 1588xfs_trans_alloc_log_vecs(
1004 xfs_trans_t *tp) 1589 xfs_trans_t *tp)
1005{ 1590{
1006 int nvecs; 1591 struct xfs_log_item_desc *lidp;
1007 xfs_log_item_desc_t *lidp; 1592 struct xfs_log_vec *lv = NULL;
1593 struct xfs_log_vec *ret_lv = NULL;
1008 1594
1009 nvecs = 1;
1010 lidp = xfs_trans_first_item(tp);
1011 ASSERT(lidp != NULL);
1012 1595
1013 /* In the non-debug case we need to start bailing out if we 1596 /* Bail out if we didn't find a log item. */
1014 * didn't find a log_item here, return zero and let trans_commit 1597 if (list_empty(&tp->t_items)) {
1015 * deal with it. 1598 ASSERT(0);
1016 */ 1599 return NULL;
1017 if (lidp == NULL) 1600 }
1018 return 0;
1019 1601
1020 while (lidp != NULL) { 1602 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1021 /* 1603 struct xfs_log_vec *new_lv;
1022 * Skip items which aren't dirty in this transaction. 1604
1023 */ 1605 /* Skip items which aren't dirty in this transaction. */
1024 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1606 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1025 lidp = xfs_trans_next_item(tp, lidp);
1026 continue; 1607 continue;
1027 } 1608
1609 /* Skip items that do not have any vectors for writing */
1028 lidp->lid_size = IOP_SIZE(lidp->lid_item); 1610 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1029 nvecs += lidp->lid_size; 1611 if (!lidp->lid_size)
1030 lidp = xfs_trans_next_item(tp, lidp); 1612 continue;
1613
1614 new_lv = kmem_zalloc(sizeof(*new_lv) +
1615 lidp->lid_size * sizeof(struct xfs_log_iovec),
1616 KM_SLEEP);
1617
1618 /* The allocated iovec region lies beyond the log vector. */
1619 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
1620 new_lv->lv_niovecs = lidp->lid_size;
1621 new_lv->lv_item = lidp->lid_item;
1622 if (!ret_lv)
1623 ret_lv = new_lv;
1624 else
1625 lv->lv_next = new_lv;
1626 lv = new_lv;
1031 } 1627 }
1032 1628
1033 return nvecs; 1629 return ret_lv;
1034} 1630}
1035 1631
1036/* 1632static int
1037 * Called from the trans_commit code when we notice that 1633xfs_trans_commit_cil(
1038 * the filesystem is in the middle of a forced shutdown. 1634 struct xfs_mount *mp,
1039 */ 1635 struct xfs_trans *tp,
1040STATIC void 1636 xfs_lsn_t *commit_lsn,
1041xfs_trans_uncommit( 1637 int flags)
1042 xfs_trans_t *tp,
1043 uint flags)
1044{ 1638{
1045 xfs_log_item_desc_t *lidp; 1639 struct xfs_log_vec *log_vector;
1640 int error;
1046 1641
1047 for (lidp = xfs_trans_first_item(tp); 1642 /*
1048 lidp != NULL; 1643 * Get each log item to allocate a vector structure for
1049 lidp = xfs_trans_next_item(tp, lidp)) { 1644 * the log item to to pass to the log write code. The
1050 /* 1645 * CIL commit code will format the vector and save it away.
1051 * Unpin all but those that aren't dirty. 1646 */
1052 */ 1647 log_vector = xfs_trans_alloc_log_vecs(tp);
1053 if (lidp->lid_flags & XFS_LID_DIRTY) 1648 if (!log_vector)
1054 IOP_UNPIN_REMOVE(lidp->lid_item, tp); 1649 return ENOMEM;
1055 }
1056 1650
1057 xfs_trans_unreserve_and_mod_sb(tp); 1651 error = xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1058 xfs_trans_unreserve_and_mod_dquots(tp); 1652 if (error)
1653 return error;
1059 1654
1060 xfs_trans_free_items(tp, flags); 1655 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1061 xfs_trans_free_busy(tp); 1656
1657 /* xfs_trans_free_items() unlocks them first */
1658 xfs_trans_free_items(tp, *commit_lsn, 0);
1062 xfs_trans_free(tp); 1659 xfs_trans_free(tp);
1660 return 0;
1063} 1661}
1064 1662
1065/* 1663/*
1066 * Fill in the vector with pointers to data to be logged 1664 * xfs_trans_commit
1067 * by this transaction. The transaction header takes
1068 * the first vector, and then each dirty item takes the
1069 * number of vectors it indicated it needed in xfs_trans_count_vecs().
1070 * 1665 *
1071 * As each item fills in the entries it needs, also pin the item 1666 * Commit the given transaction to the log a/synchronously.
1072 * so that it cannot be flushed out until the log write completes. 1667 *
1668 * XFS disk error handling mechanism is not based on a typical
1669 * transaction abort mechanism. Logically after the filesystem
1670 * gets marked 'SHUTDOWN', we can't let any new transactions
1671 * be durable - ie. committed to disk - because some metadata might
1672 * be inconsistent. In such cases, this returns an error, and the
1673 * caller may assume that all locked objects joined to the transaction
1674 * have already been unlocked as if the commit had succeeded.
1675 * Do not reference the transaction structure after this call.
1073 */ 1676 */
1074STATIC void 1677int
1075xfs_trans_fill_vecs( 1678_xfs_trans_commit(
1076 xfs_trans_t *tp, 1679 struct xfs_trans *tp,
1077 xfs_log_iovec_t *log_vector) 1680 uint flags,
1681 int *log_flushed)
1078{ 1682{
1079 xfs_log_item_desc_t *lidp; 1683 struct xfs_mount *mp = tp->t_mountp;
1080 xfs_log_iovec_t *vecp; 1684 xfs_lsn_t commit_lsn = -1;
1081 uint nitems; 1685 int error = 0;
1686 int log_flags = 0;
1687 int sync = tp->t_flags & XFS_TRANS_SYNC;
1082 1688
1083 /* 1689 /*
1084 * Skip over the entry for the transaction header, we'll 1690 * Determine whether this commit is releasing a permanent
1085 * fill that in at the end. 1691 * log reservation or not.
1086 */ 1692 */
1087 vecp = log_vector + 1; /* pointer arithmetic */ 1693 if (flags & XFS_TRANS_RELEASE_LOG_RES) {
1694 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1695 log_flags = XFS_LOG_REL_PERM_RESERV;
1696 }
1088 1697
1089 nitems = 0; 1698 /*
1090 lidp = xfs_trans_first_item(tp); 1699 * If there is nothing to be logged by the transaction,
1091 ASSERT(lidp != NULL); 1700 * then unlock all of the items associated with the
1092 while (lidp != NULL) { 1701 * transaction and free the transaction structure.
1093 /* 1702 * Also make sure to return any reserved blocks to
1094 * Skip items which aren't dirty in this transaction. 1703 * the free pool.
1095 */ 1704 */
1096 if (!(lidp->lid_flags & XFS_LID_DIRTY)) { 1705 if (!(tp->t_flags & XFS_TRANS_DIRTY))
1097 lidp = xfs_trans_next_item(tp, lidp); 1706 goto out_unreserve;
1098 continue; 1707
1099 } 1708 if (XFS_FORCED_SHUTDOWN(mp)) {
1100 /* 1709 error = XFS_ERROR(EIO);
1101 * The item may be marked dirty but not log anything. 1710 goto out_unreserve;
1102 * This can be used to get called when a transaction 1711 }
1103 * is committed. 1712
1104 */ 1713 ASSERT(tp->t_ticket != NULL);
1105 if (lidp->lid_size) { 1714
1106 nitems++; 1715 /*
1716 * If we need to update the superblock, then do it now.
1717 */
1718 if (tp->t_flags & XFS_TRANS_SB_DIRTY)
1719 xfs_trans_apply_sb_deltas(tp);
1720 xfs_trans_apply_dquot_deltas(tp);
1721
1722 if (mp->m_flags & XFS_MOUNT_DELAYLOG)
1723 error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
1724 else
1725 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1726
1727 if (error == ENOMEM) {
1728 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1729 error = XFS_ERROR(EIO);
1730 goto out_unreserve;
1731 }
1732
1733 /*
1734 * If the transaction needs to be synchronous, then force the
1735 * log out now and wait for it.
1736 */
1737 if (sync) {
1738 if (!error) {
1739 error = _xfs_log_force_lsn(mp, commit_lsn,
1740 XFS_LOG_SYNC, log_flushed);
1107 } 1741 }
1108 IOP_FORMAT(lidp->lid_item, vecp); 1742 XFS_STATS_INC(xs_trans_sync);
1109 vecp += lidp->lid_size; /* pointer arithmetic */ 1743 } else {
1110 IOP_PIN(lidp->lid_item); 1744 XFS_STATS_INC(xs_trans_async);
1111 lidp = xfs_trans_next_item(tp, lidp);
1112 } 1745 }
1113 1746
1747 return error;
1748
1749out_unreserve:
1750 xfs_trans_unreserve_and_mod_sb(tp);
1751
1114 /* 1752 /*
1115 * Now that we've counted the number of items in this 1753 * It is indeed possible for the transaction to be not dirty but
1116 * transaction, fill in the transaction header. 1754 * the dqinfo portion to be. All that means is that we have some
1755 * (non-persistent) quota reservations that need to be unreserved.
1117 */ 1756 */
1118 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC; 1757 xfs_trans_unreserve_and_mod_dquots(tp);
1119 tp->t_header.th_type = tp->t_type; 1758 if (tp->t_ticket) {
1120 tp->t_header.th_num_items = nitems; 1759 commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
1121 log_vector->i_addr = (xfs_caddr_t)&tp->t_header; 1760 if (commit_lsn == -1 && !error)
1122 log_vector->i_len = sizeof(xfs_trans_header_t); 1761 error = XFS_ERROR(EIO);
1123 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR; 1762 }
1124} 1763 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1764 xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0);
1765 xfs_trans_free(tp);
1125 1766
1767 XFS_STATS_INC(xs_trans_empty);
1768 return error;
1769}
1126 1770
1127/* 1771/*
1128 * Unlock all of the transaction's items and free the transaction. 1772 * Unlock all of the transaction's items and free the transaction.
@@ -1138,12 +1782,6 @@ xfs_trans_cancel(
1138 int flags) 1782 int flags)
1139{ 1783{
1140 int log_flags; 1784 int log_flags;
1141#ifdef DEBUG
1142 xfs_log_item_chunk_t *licp;
1143 xfs_log_item_desc_t *lidp;
1144 xfs_log_item_t *lip;
1145 int i;
1146#endif
1147 xfs_mount_t *mp = tp->t_mountp; 1785 xfs_mount_t *mp = tp->t_mountp;
1148 1786
1149 /* 1787 /*
@@ -1162,21 +1800,11 @@ xfs_trans_cancel(
1162 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1800 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1163 } 1801 }
1164#ifdef DEBUG 1802#ifdef DEBUG
1165 if (!(flags & XFS_TRANS_ABORT)) { 1803 if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) {
1166 licp = &(tp->t_items); 1804 struct xfs_log_item_desc *lidp;
1167 while (licp != NULL) { 1805
1168 lidp = licp->lic_descs; 1806 list_for_each_entry(lidp, &tp->t_items, lid_trans)
1169 for (i = 0; i < licp->lic_unused; i++, lidp++) { 1807 ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD));
1170 if (xfs_lic_isfree(licp, i)) {
1171 continue;
1172 }
1173
1174 lip = lidp->lid_item;
1175 if (!XFS_FORCED_SHUTDOWN(mp))
1176 ASSERT(!(lip->li_type == XFS_LI_EFD));
1177 }
1178 licp = licp->lic_next;
1179 }
1180 } 1808 }
1181#endif 1809#endif
1182 xfs_trans_unreserve_and_mod_sb(tp); 1810 xfs_trans_unreserve_and_mod_sb(tp);
@@ -1195,25 +1823,10 @@ xfs_trans_cancel(
1195 /* mark this thread as no longer being in a transaction */ 1823 /* mark this thread as no longer being in a transaction */
1196 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 1824 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1197 1825
1198 xfs_trans_free_items(tp, flags); 1826 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
1199 xfs_trans_free_busy(tp);
1200 xfs_trans_free(tp); 1827 xfs_trans_free(tp);
1201} 1828}
1202 1829
1203
1204/*
1205 * Free the transaction structure. If there is more clean up
1206 * to do when the structure is freed, add it here.
1207 */
1208STATIC void
1209xfs_trans_free(
1210 xfs_trans_t *tp)
1211{
1212 atomic_dec(&tp->t_mountp->m_active_trans);
1213 xfs_trans_free_dqinfo(tp);
1214 kmem_zone_free(xfs_trans_zone, tp);
1215}
1216
1217/* 1830/*
1218 * Roll from one trans in the sequence of PERMANENT transactions to 1831 * Roll from one trans in the sequence of PERMANENT transactions to
1219 * the next: permanent transactions are only flushed out when 1832 * the next: permanent transactions are only flushed out when
@@ -1279,178 +1892,6 @@ xfs_trans_roll(
1279 if (error) 1892 if (error)
1280 return error; 1893 return error;
1281 1894
1282 xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); 1895 xfs_trans_ijoin(trans, dp);
1283 xfs_trans_ihold(trans, dp);
1284 return 0; 1896 return 0;
1285} 1897}
1286
1287/*
1288 * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
1289 *
1290 * This is typically called by the LM when a transaction has been fully
1291 * committed to disk. It needs to unpin the items which have
1292 * been logged by the transaction and update their positions
1293 * in the AIL if necessary.
1294 * This also gets called when the transactions didn't get written out
1295 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1296 *
1297 * Call xfs_trans_chunk_committed() to process the items in
1298 * each chunk.
1299 */
1300STATIC void
1301xfs_trans_committed(
1302 xfs_trans_t *tp,
1303 int abortflag)
1304{
1305 xfs_log_item_chunk_t *licp;
1306 xfs_log_item_chunk_t *next_licp;
1307 xfs_log_busy_chunk_t *lbcp;
1308 xfs_log_busy_slot_t *lbsp;
1309 int i;
1310
1311 /*
1312 * Call the transaction's completion callback if there
1313 * is one.
1314 */
1315 if (tp->t_callback != NULL) {
1316 tp->t_callback(tp, tp->t_callarg);
1317 }
1318
1319 /*
1320 * Special case the chunk embedded in the transaction.
1321 */
1322 licp = &(tp->t_items);
1323 if (!(xfs_lic_are_all_free(licp))) {
1324 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1325 }
1326
1327 /*
1328 * Process the items in each chunk in turn.
1329 */
1330 licp = licp->lic_next;
1331 while (licp != NULL) {
1332 ASSERT(!xfs_lic_are_all_free(licp));
1333 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1334 next_licp = licp->lic_next;
1335 kmem_free(licp);
1336 licp = next_licp;
1337 }
1338
1339 /*
1340 * Clear all the per-AG busy list items listed in this transaction
1341 */
1342 lbcp = &tp->t_busy;
1343 while (lbcp != NULL) {
1344 for (i = 0, lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
1345 if (!XFS_LBC_ISFREE(lbcp, i)) {
1346 xfs_alloc_clear_busy(tp, lbsp->lbc_ag,
1347 lbsp->lbc_idx);
1348 }
1349 }
1350 lbcp = lbcp->lbc_next;
1351 }
1352 xfs_trans_free_busy(tp);
1353
1354 /*
1355 * That's it for the transaction structure. Free it.
1356 */
1357 xfs_trans_free(tp);
1358}
1359
1360/*
1361 * This is called to perform the commit processing for each
1362 * item described by the given chunk.
1363 *
1364 * The commit processing consists of unlocking items which were
1365 * held locked with the SYNC_UNLOCK attribute, calling the committed
1366 * routine of each logged item, updating the item's position in the AIL
1367 * if necessary, and unpinning each item. If the committed routine
1368 * returns -1, then do nothing further with the item because it
1369 * may have been freed.
1370 *
1371 * Since items are unlocked when they are copied to the incore
1372 * log, it is possible for two transactions to be completing
1373 * and manipulating the same item simultaneously. The AIL lock
1374 * will protect the lsn field of each item. The value of this
1375 * field can never go backwards.
1376 *
1377 * We unpin the items after repositioning them in the AIL, because
1378 * otherwise they could be immediately flushed and we'd have to race
1379 * with the flusher trying to pull the item from the AIL as we add it.
1380 */
1381STATIC void
1382xfs_trans_chunk_committed(
1383 xfs_log_item_chunk_t *licp,
1384 xfs_lsn_t lsn,
1385 int aborted)
1386{
1387 xfs_log_item_desc_t *lidp;
1388 xfs_log_item_t *lip;
1389 xfs_lsn_t item_lsn;
1390 int i;
1391
1392 lidp = licp->lic_descs;
1393 for (i = 0; i < licp->lic_unused; i++, lidp++) {
1394 struct xfs_ail *ailp;
1395
1396 if (xfs_lic_isfree(licp, i)) {
1397 continue;
1398 }
1399
1400 lip = lidp->lid_item;
1401 if (aborted)
1402 lip->li_flags |= XFS_LI_ABORTED;
1403
1404 /*
1405 * Send in the ABORTED flag to the COMMITTED routine
1406 * so that it knows whether the transaction was aborted
1407 * or not.
1408 */
1409 item_lsn = IOP_COMMITTED(lip, lsn);
1410
1411 /*
1412 * If the committed routine returns -1, make
1413 * no more references to the item.
1414 */
1415 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) {
1416 continue;
1417 }
1418
1419 /*
1420 * If the returned lsn is greater than what it
1421 * contained before, update the location of the
1422 * item in the AIL. If it is not, then do nothing.
1423 * Items can never move backwards in the AIL.
1424 *
1425 * While the new lsn should usually be greater, it
1426 * is possible that a later transaction completing
1427 * simultaneously with an earlier one using the
1428 * same item could complete first with a higher lsn.
1429 * This would cause the earlier transaction to fail
1430 * the test below.
1431 */
1432 ailp = lip->li_ailp;
1433 spin_lock(&ailp->xa_lock);
1434 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1435 /*
1436 * This will set the item's lsn to item_lsn
1437 * and update the position of the item in
1438 * the AIL.
1439 *
1440 * xfs_trans_ail_update() drops the AIL lock.
1441 */
1442 xfs_trans_ail_update(ailp, lip, item_lsn);
1443 } else {
1444 spin_unlock(&ailp->xa_lock);
1445 }
1446
1447 /*
1448 * Now that we've repositioned the item in the AIL,
1449 * unpin it so it can be flushed. Pass information
1450 * about buffer stale state down from the log item
1451 * flags, if anyone else stales the buffer we do not
1452 * want to pay any attention to it.
1453 */
1454 IOP_UNPIN(lip, lidp->lid_flags & XFS_LID_BUF_STALE);
1455 }
1456}