aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-06-23 21:15:33 -0400
committerDave Chinner <david@fromorbit.com>2010-06-23 21:15:33 -0400
commit7124fe0a5b619d65b739477b3b55a20bf805b06d (patch)
treebe333ebdcc7df735070dbc1441c1d59682d06132
parent7dce11dbac54fce777eea0f5fb25b2694ccd7900 (diff)
xfs: validate untrusted inode numbers during lookup
When we decode a handle or do a bulkstat lookup, we are using an inode number we cannot trust to be valid. If we are deleting inode chunks from disk (default noikeep mode), then we cannot trust the on disk inode buffer for any given inode number to correctly reflect whether the inode has been unlinked as the di_mode nor the generation number may have been updated on disk. This is due to the fact that when we delete an inode chunk, we do not write the clusters back to disk when they are removed - instead we mark them stale to avoid them being written back potentially over the top of something that has been subsequently allocated at that location. The result is that we can have locations of disk that look like they contain valid inodes but in reality do not. Hence we cannot simply convert the inode number to a block number and read the location from disk to determine if the inode is valid or not. As a result, and XFS_IGET_BULKSTAT lookup needs to actually look the inode up in the inode allocation btree to determine if the inode number is valid or not. It should be noted even on ikeep filesystems, there is the possibility that blocks on disk may look like valid inode clusters. e.g. if there are filesystem images hosted on the filesystem. Hence even for ikeep filesystems we really need to validate that the inode number is valid before issuing the inode buffer read. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
-rw-r--r--fs/xfs/xfs_ialloc.c121
1 files changed, 78 insertions, 43 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 9d884c127bb9..0c946c8e05da 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1203,6 +1203,63 @@ error0:
1203 return error; 1203 return error;
1204} 1204}
1205 1205
1206STATIC int
1207xfs_imap_lookup(
1208 struct xfs_mount *mp,
1209 struct xfs_trans *tp,
1210 xfs_agnumber_t agno,
1211 xfs_agino_t agino,
1212 xfs_agblock_t agbno,
1213 xfs_agblock_t *chunk_agbno,
1214 xfs_agblock_t *offset_agbno,
1215 int flags)
1216{
1217 struct xfs_inobt_rec_incore rec;
1218 struct xfs_btree_cur *cur;
1219 struct xfs_buf *agbp;
1220 xfs_agino_t startino;
1221 int error;
1222 int i;
1223
1224 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1225 if (error) {
1226 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1227 "xfs_ialloc_read_agi() returned "
1228 "error %d, agno %d",
1229 error, agno);
1230 return error;
1231 }
1232
1233 /*
1234 * derive and lookup the exact inode record for the given agino. If the
1235 * record cannot be found, then it's an invalid inode number and we
1236 * should abort.
1237 */
1238 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1239 startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
1240 error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
1241 if (!error) {
1242 if (i)
1243 error = xfs_inobt_get_rec(cur, &rec, &i);
1244 if (!error && i == 0)
1245 error = EINVAL;
1246 }
1247
1248 xfs_trans_brelse(tp, agbp);
1249 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1250 if (error)
1251 return error;
1252
1253 /* for untrusted inodes check it is allocated first */
1254 if ((flags & XFS_IGET_BULKSTAT) &&
1255 (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
1256 return EINVAL;
1257
1258 *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
1259 *offset_agbno = agbno - *chunk_agbno;
1260 return 0;
1261}
1262
1206/* 1263/*
1207 * Return the location of the inode in imap, for mapping it into a buffer. 1264 * Return the location of the inode in imap, for mapping it into a buffer.
1208 */ 1265 */
@@ -1263,6 +1320,23 @@ xfs_imap(
1263 return XFS_ERROR(EINVAL); 1320 return XFS_ERROR(EINVAL);
1264 } 1321 }
1265 1322
1323 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1324
1325 /*
1326 * For bulkstat and handle lookups, we have an untrusted inode number
1327 * that we have to verify is valid. We cannot do this just by reading
1328 * the inode buffer as it may have been unlinked and removed leaving
1329 * inodes in stale state on disk. Hence we have to do a btree lookup
1330 * in all cases where an untrusted inode number is passed.
1331 */
1332 if (flags & XFS_IGET_BULKSTAT) {
1333 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1334 &chunk_agbno, &offset_agbno, flags);
1335 if (error)
1336 return error;
1337 goto out_map;
1338 }
1339
1266 /* 1340 /*
1267 * If the inode cluster size is the same as the blocksize or 1341 * If the inode cluster size is the same as the blocksize or
1268 * smaller we get to the buffer by simple arithmetics. 1342 * smaller we get to the buffer by simple arithmetics.
@@ -1277,10 +1351,8 @@ xfs_imap(
1277 return 0; 1351 return 0;
1278 } 1352 }
1279 1353
1280 blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog;
1281
1282 /* 1354 /*
1283 * If we get a block number passed from bulkstat we can use it to 1355 * If we get a block number passed we can use it to
1284 * find the buffer easily. 1356 * find the buffer easily.
1285 */ 1357 */
1286 if (imap->im_blkno) { 1358 if (imap->im_blkno) {
@@ -1304,50 +1376,13 @@ xfs_imap(
1304 offset_agbno = agbno & mp->m_inoalign_mask; 1376 offset_agbno = agbno & mp->m_inoalign_mask;
1305 chunk_agbno = agbno - offset_agbno; 1377 chunk_agbno = agbno - offset_agbno;
1306 } else { 1378 } else {
1307 xfs_btree_cur_t *cur; /* inode btree cursor */ 1379 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
1308 xfs_inobt_rec_incore_t chunk_rec; 1380 &chunk_agbno, &offset_agbno, flags);
1309 xfs_buf_t *agbp; /* agi buffer */
1310 int i; /* temp state */
1311
1312 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1313 if (error) {
1314 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1315 "xfs_ialloc_read_agi() returned "
1316 "error %d, agno %d",
1317 error, agno);
1318 return error;
1319 }
1320
1321 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
1322 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
1323 if (error) {
1324 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1325 "xfs_inobt_lookup() failed");
1326 goto error0;
1327 }
1328
1329 error = xfs_inobt_get_rec(cur, &chunk_rec, &i);
1330 if (error) {
1331 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1332 "xfs_inobt_get_rec() failed");
1333 goto error0;
1334 }
1335 if (i == 0) {
1336#ifdef DEBUG
1337 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
1338 "xfs_inobt_get_rec() failed");
1339#endif /* DEBUG */
1340 error = XFS_ERROR(EINVAL);
1341 }
1342 error0:
1343 xfs_trans_brelse(tp, agbp);
1344 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1345 if (error) 1381 if (error)
1346 return error; 1382 return error;
1347 chunk_agbno = XFS_AGINO_TO_AGBNO(mp, chunk_rec.ir_startino);
1348 offset_agbno = agbno - chunk_agbno;
1349 } 1383 }
1350 1384
1385out_map:
1351 ASSERT(agbno >= chunk_agbno); 1386 ASSERT(agbno >= chunk_agbno);
1352 cluster_agbno = chunk_agbno + 1387 cluster_agbno = chunk_agbno +
1353 ((offset_agbno / blks_per_cluster) * blks_per_cluster); 1388 ((offset_agbno / blks_per_cluster) * blks_per_cluster);