aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-03-27 03:00:38 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-04-17 21:42:09 -0400
commit75de2a91c98a6f486f261c1367fe59f5583e15a3 (patch)
tree599d1024b34a6649f8c589100f72adf0f30b40b5
parent535f6b3735db6ef6026537bfe55ae00c3d9cc1ee (diff)
[XFS] Account for inode cluster alignment in all allocations
At ENOSPC, we can get a filesystem shutdown due to a cancelling a dirty transaction in xfs_mkdir or xfs_create. This is due to the initial allocation attempt not taking into account inode alignment and hence we can prepare the AGF freelist for allocation when it's not actually possible to do an allocation. This results in inode allocation returning ENOSPC with a dirty transaction, and hence we shut down the filesystem. Because the first allocation is an exact allocation attempt, we must tell the allocator that the alignment does not affect the allocation attempt. i.e. we will accept any extent alignment as long as the extent starts at the block we want. Unfortunately, this means that if the longest free extent is less than the length + alignment necessary for fallback allocation attempts but is long enough to attempt a non-aligned allocation, we will modify the free list. If we then have the exact allocation fail, all other allocation attempts will also fail due to the alignment constraint being taken into account. Hence the initial attempt needs to set the "alignment slop" field so that alignment, while not required, must be taken into account when determining if there is enough space left in the AG to do the allocation. That means if the exact allocation fails, we will not dirty the freelist if there is not enough space available fo a subsequent allocation to succeed. Hence we get an ENOSPC error back to userspace without shutting down the filesystem. SGI-PV: 978886 SGI-Modid: xfs-linux-melb:xfs-kern:30699a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/xfs_ialloc.c44
1 files changed, 29 insertions, 15 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5a146cb22980..a64dfbd565a5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -107,6 +107,16 @@ xfs_ialloc_log_di(
107/* 107/*
108 * Allocation group level functions. 108 * Allocation group level functions.
109 */ 109 */
110static inline int
111xfs_ialloc_cluster_alignment(
112 xfs_alloc_arg_t *args)
113{
114 if (xfs_sb_version_hasalign(&args->mp->m_sb) &&
115 args->mp->m_sb.sb_inoalignmt >=
116 XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp)))
117 return args->mp->m_sb.sb_inoalignmt;
118 return 1;
119}
110 120
111/* 121/*
112 * Allocate new inodes in the allocation group specified by agbp. 122 * Allocate new inodes in the allocation group specified by agbp.
@@ -167,10 +177,24 @@ xfs_ialloc_ag_alloc(
167 args.mod = args.total = args.wasdel = args.isfl = 177 args.mod = args.total = args.wasdel = args.isfl =
168 args.userdata = args.minalignslop = 0; 178 args.userdata = args.minalignslop = 0;
169 args.prod = 1; 179 args.prod = 1;
170 args.alignment = 1; 180
171 /* 181 /*
172 * Allow space for the inode btree to split. 182 * We need to take into account alignment here to ensure that
183 * we don't modify the free list if we fail to have an exact
184 * block. If we don't have an exact match, and every oher
185 * attempt allocation attempt fails, we'll end up cancelling
186 * a dirty transaction and shutting down.
187 *
188 * For an exact allocation, alignment must be 1,
189 * however we need to take cluster alignment into account when
190 * fixing up the freelist. Use the minalignslop field to
191 * indicate that extra blocks might be required for alignment,
192 * but not to use them in the actual exact allocation.
173 */ 193 */
194 args.alignment = 1;
195 args.minalignslop = xfs_ialloc_cluster_alignment(&args) - 1;
196
197 /* Allow space for the inode btree to split. */
174 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1; 198 args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
175 if ((error = xfs_alloc_vextent(&args))) 199 if ((error = xfs_alloc_vextent(&args)))
176 return error; 200 return error;
@@ -191,13 +215,8 @@ xfs_ialloc_ag_alloc(
191 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 215 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
192 args.alignment = args.mp->m_dalign; 216 args.alignment = args.mp->m_dalign;
193 isaligned = 1; 217 isaligned = 1;
194 } else if (xfs_sb_version_hasalign(&args.mp->m_sb) && 218 } else
195 args.mp->m_sb.sb_inoalignmt >= 219 args.alignment = xfs_ialloc_cluster_alignment(&args);
196 XFS_B_TO_FSBT(args.mp,
197 XFS_INODE_CLUSTER_SIZE(args.mp)))
198 args.alignment = args.mp->m_sb.sb_inoalignmt;
199 else
200 args.alignment = 1;
201 /* 220 /*
202 * Need to figure out where to allocate the inode blocks. 221 * Need to figure out where to allocate the inode blocks.
203 * Ideally they should be spaced out through the a.g. 222 * Ideally they should be spaced out through the a.g.
@@ -230,12 +249,7 @@ xfs_ialloc_ag_alloc(
230 args.agbno = be32_to_cpu(agi->agi_root); 249 args.agbno = be32_to_cpu(agi->agi_root);
231 args.fsbno = XFS_AGB_TO_FSB(args.mp, 250 args.fsbno = XFS_AGB_TO_FSB(args.mp,
232 be32_to_cpu(agi->agi_seqno), args.agbno); 251 be32_to_cpu(agi->agi_seqno), args.agbno);
233 if (xfs_sb_version_hasalign(&args.mp->m_sb) && 252 args.alignment = xfs_ialloc_cluster_alignment(&args);
234 args.mp->m_sb.sb_inoalignmt >=
235 XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
236 args.alignment = args.mp->m_sb.sb_inoalignmt;
237 else
238 args.alignment = 1;
239 if ((error = xfs_alloc_vextent(&args))) 253 if ((error = xfs_alloc_vextent(&args)))
240 return error; 254 return error;
241 } 255 }