diff options
| author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-03 16:34:11 -0400 |
|---|---|---|
| committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-10 20:32:08 -0400 |
| commit | 063c4561f52a74de686fe0ff2f96f4f54c9fecd2 (patch) | |
| tree | 73a202c316df70bdfafa489d70e2863c5c5ea33a /fs | |
| parent | 35edec1d52c075975991471d624b33b9336226f2 (diff) | |
ocfs2: support for removing file regions
Provide an internal interface for the removal of arbitrary file regions.
ocfs2_remove_inode_range() takes a byte range within a file and will remove
existing extents within that range. Partial clusters will be zeroed so that
any read from within the region will return zeros.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/ocfs2/alloc.c | 20 | ||||
| -rw-r--r-- | fs/ocfs2/alloc.h | 10 | ||||
| -rw-r--r-- | fs/ocfs2/file.c | 242 | ||||
| -rw-r--r-- | fs/ocfs2/journal.h | 2 |
4 files changed, 262 insertions, 12 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index df186d2e8248..f5e11f4fa952 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -4373,10 +4373,10 @@ out: | |||
| 4373 | return ret; | 4373 | return ret; |
| 4374 | } | 4374 | } |
| 4375 | 4375 | ||
| 4376 | static int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 4376 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, |
| 4377 | u32 cpos, u32 len, handle_t *handle, | 4377 | u32 cpos, u32 len, handle_t *handle, |
| 4378 | struct ocfs2_alloc_context *meta_ac, | 4378 | struct ocfs2_alloc_context *meta_ac, |
| 4379 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 4379 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
| 4380 | { | 4380 | { |
| 4381 | int ret, index; | 4381 | int ret, index; |
| 4382 | u32 rec_range, trunc_range; | 4382 | u32 rec_range, trunc_range; |
| @@ -4506,7 +4506,7 @@ out: | |||
| 4506 | return ret; | 4506 | return ret; |
| 4507 | } | 4507 | } |
| 4508 | 4508 | ||
| 4509 | static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | 4509 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) |
| 4510 | { | 4510 | { |
| 4511 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 4511 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
| 4512 | struct ocfs2_dinode *di; | 4512 | struct ocfs2_dinode *di; |
| @@ -4539,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, | |||
| 4539 | return current_tail == new_start; | 4539 | return current_tail == new_start; |
| 4540 | } | 4540 | } |
| 4541 | 4541 | ||
| 4542 | static int ocfs2_truncate_log_append(struct ocfs2_super *osb, | 4542 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, |
| 4543 | handle_t *handle, | 4543 | handle_t *handle, |
| 4544 | u64 start_blk, | 4544 | u64 start_blk, |
| 4545 | unsigned int num_clusters) | 4545 | unsigned int num_clusters) |
| 4546 | { | 4546 | { |
| 4547 | int status, index; | 4547 | int status, index; |
| 4548 | unsigned int start_cluster, tl_count; | 4548 | unsigned int start_cluster, tl_count; |
| @@ -4698,7 +4698,7 @@ bail: | |||
| 4698 | } | 4698 | } |
| 4699 | 4699 | ||
| 4700 | /* Expects you to already be holding tl_inode->i_mutex */ | 4700 | /* Expects you to already be holding tl_inode->i_mutex */ |
| 4701 | static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | 4701 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) |
| 4702 | { | 4702 | { |
| 4703 | int status; | 4703 | int status; |
| 4704 | unsigned int num_to_flush; | 4704 | unsigned int num_to_flush; |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 752ef860873d..990df48ae8d3 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
| @@ -41,6 +41,10 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
| 41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 41 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
| 42 | struct ocfs2_alloc_context *meta_ac, | 42 | struct ocfs2_alloc_context *meta_ac, |
| 43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 43 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
| 44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||
| 45 | u32 cpos, u32 len, handle_t *handle, | ||
| 46 | struct ocfs2_alloc_context *meta_ac, | ||
| 47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
| 44 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
| 45 | struct inode *inode, | 49 | struct inode *inode, |
| 46 | struct ocfs2_dinode *fe); | 50 | struct ocfs2_dinode *fe); |
| @@ -68,6 +72,12 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
| 68 | struct ocfs2_dinode **tl_copy); | 72 | struct ocfs2_dinode **tl_copy); |
| 69 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, | 73 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, |
| 70 | struct ocfs2_dinode *tl_copy); | 74 | struct ocfs2_dinode *tl_copy); |
| 75 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb); | ||
| 76 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||
| 77 | handle_t *handle, | ||
| 78 | u64 start_blk, | ||
| 79 | unsigned int num_clusters); | ||
| 80 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); | ||
| 71 | 81 | ||
| 72 | /* | 82 | /* |
| 73 | * Process local structure which describes the block unlinks done | 83 | * Process local structure which describes the block unlinks done |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f0a6b1330a6e..11f7cf9f2511 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -541,12 +541,15 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
| 541 | struct ocfs2_alloc_context **data_ac, | 541 | struct ocfs2_alloc_context **data_ac, |
| 542 | struct ocfs2_alloc_context **meta_ac) | 542 | struct ocfs2_alloc_context **meta_ac) |
| 543 | { | 543 | { |
| 544 | int ret, num_free_extents; | 544 | int ret = 0, num_free_extents; |
| 545 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | 545 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; |
| 546 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 546 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 547 | 547 | ||
| 548 | *meta_ac = NULL; | 548 | *meta_ac = NULL; |
| 549 | *data_ac = NULL; | 549 | if (data_ac) |
| 550 | *data_ac = NULL; | ||
| 551 | |||
| 552 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
| 550 | 553 | ||
| 551 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 554 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
| 552 | "clusters_to_add = %u, extents_to_split = %u\n", | 555 | "clusters_to_add = %u, extents_to_split = %u\n", |
| @@ -583,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
| 583 | } | 586 | } |
| 584 | } | 587 | } |
| 585 | 588 | ||
| 589 | if (clusters_to_add == 0) | ||
| 590 | goto out; | ||
| 591 | |||
| 586 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | 592 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); |
| 587 | if (ret < 0) { | 593 | if (ret < 0) { |
| 588 | if (ret != -ENOSPC) | 594 | if (ret != -ENOSPC) |
| @@ -1252,6 +1258,238 @@ out: | |||
| 1252 | return ret; | 1258 | return ret; |
| 1253 | } | 1259 | } |
| 1254 | 1260 | ||
| 1261 | static int __ocfs2_remove_inode_range(struct inode *inode, | ||
| 1262 | struct buffer_head *di_bh, | ||
| 1263 | u32 cpos, u32 phys_cpos, u32 len, | ||
| 1264 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 1265 | { | ||
| 1266 | int ret; | ||
| 1267 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 1268 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1269 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 1270 | handle_t *handle; | ||
| 1271 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 1272 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1273 | |||
| 1274 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | ||
| 1275 | if (ret) { | ||
| 1276 | mlog_errno(ret); | ||
| 1277 | return ret; | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | mutex_lock(&tl_inode->i_mutex); | ||
| 1281 | |||
| 1282 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
| 1283 | ret = __ocfs2_flush_truncate_log(osb); | ||
| 1284 | if (ret < 0) { | ||
| 1285 | mlog_errno(ret); | ||
| 1286 | goto out; | ||
| 1287 | } | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
| 1291 | if (handle == NULL) { | ||
| 1292 | ret = -ENOMEM; | ||
| 1293 | mlog_errno(ret); | ||
| 1294 | goto out; | ||
| 1295 | } | ||
| 1296 | |||
| 1297 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
| 1298 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1299 | if (ret) { | ||
| 1300 | mlog_errno(ret); | ||
| 1301 | goto out; | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | ||
| 1305 | dealloc); | ||
| 1306 | if (ret) { | ||
| 1307 | mlog_errno(ret); | ||
| 1308 | goto out_commit; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | OCFS2_I(inode)->ip_clusters -= len; | ||
| 1312 | di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | ||
| 1313 | |||
| 1314 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
| 1315 | if (ret) { | ||
| 1316 | mlog_errno(ret); | ||
| 1317 | goto out_commit; | ||
| 1318 | } | ||
| 1319 | |||
| 1320 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
| 1321 | if (ret) | ||
| 1322 | mlog_errno(ret); | ||
| 1323 | |||
| 1324 | out_commit: | ||
| 1325 | ocfs2_commit_trans(osb, handle); | ||
| 1326 | out: | ||
| 1327 | mutex_unlock(&tl_inode->i_mutex); | ||
| 1328 | |||
| 1329 | if (meta_ac) | ||
| 1330 | ocfs2_free_alloc_context(meta_ac); | ||
| 1331 | |||
| 1332 | return ret; | ||
| 1333 | } | ||
| 1334 | |||
| 1335 | /* | ||
| 1336 | * Truncate a byte range, avoiding pages within partial clusters. This | ||
| 1337 | * preserves those pages for the zeroing code to write to. | ||
| 1338 | */ | ||
| 1339 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | ||
| 1340 | u64 byte_len) | ||
| 1341 | { | ||
| 1342 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1343 | loff_t start, end; | ||
| 1344 | struct address_space *mapping = inode->i_mapping; | ||
| 1345 | |||
| 1346 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | ||
| 1347 | end = byte_start + byte_len; | ||
| 1348 | end = end & ~(osb->s_clustersize - 1); | ||
| 1349 | |||
| 1350 | if (start < end) { | ||
| 1351 | unmap_mapping_range(mapping, start, end - start, 0); | ||
| 1352 | truncate_inode_pages_range(mapping, start, end - 1); | ||
| 1353 | } | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | static int ocfs2_zero_partial_clusters(struct inode *inode, | ||
| 1357 | u64 start, u64 len) | ||
| 1358 | { | ||
| 1359 | int ret = 0; | ||
| 1360 | u64 tmpend, end = start + len; | ||
| 1361 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1362 | unsigned int csize = osb->s_clustersize; | ||
| 1363 | handle_t *handle; | ||
| 1364 | |||
| 1365 | /* | ||
| 1366 | * The "start" and "end" values are NOT necessarily part of | ||
| 1367 | * the range whose allocation is being deleted. Rather, this | ||
| 1368 | * is what the user passed in with the request. We must zero | ||
| 1369 | * partial clusters here. There's no need to worry about | ||
| 1370 | * physical allocation - the zeroing code knows to skip holes. | ||
| 1371 | */ | ||
| 1372 | mlog(0, "byte start: %llu, end: %llu\n", | ||
| 1373 | (unsigned long long)start, (unsigned long long)end); | ||
| 1374 | |||
| 1375 | /* | ||
| 1376 | * If both edges are on a cluster boundary then there's no | ||
| 1377 | * zeroing required as the region is part of the allocation to | ||
| 1378 | * be truncated. | ||
| 1379 | */ | ||
| 1380 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | ||
| 1381 | goto out; | ||
| 1382 | |||
| 1383 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
| 1384 | if (handle == NULL) { | ||
| 1385 | ret = -ENOMEM; | ||
| 1386 | mlog_errno(ret); | ||
| 1387 | goto out; | ||
| 1388 | } | ||
| 1389 | |||
| 1390 | /* | ||
| 1391 | * We want to get the byte offset of the end of the 1st cluster. | ||
| 1392 | */ | ||
| 1393 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | ||
| 1394 | if (tmpend > end) | ||
| 1395 | tmpend = end; | ||
| 1396 | |||
| 1397 | mlog(0, "1st range: start: %llu, tmpend: %llu\n", | ||
| 1398 | (unsigned long long)start, (unsigned long long)tmpend); | ||
| 1399 | |||
| 1400 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | ||
| 1401 | if (ret) | ||
| 1402 | mlog_errno(ret); | ||
| 1403 | |||
| 1404 | if (tmpend < end) { | ||
| 1405 | /* | ||
| 1406 | * This may make start and end equal, but the zeroing | ||
| 1407 | * code will skip any work in that case so there's no | ||
| 1408 | * need to catch it up here. | ||
| 1409 | */ | ||
| 1410 | start = end & ~(osb->s_clustersize - 1); | ||
| 1411 | |||
| 1412 | mlog(0, "2nd range: start: %llu, end: %llu\n", | ||
| 1413 | (unsigned long long)start, (unsigned long long)end); | ||
| 1414 | |||
| 1415 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | ||
| 1416 | if (ret) | ||
| 1417 | mlog_errno(ret); | ||
| 1418 | } | ||
| 1419 | |||
| 1420 | ocfs2_commit_trans(osb, handle); | ||
| 1421 | out: | ||
| 1422 | return ret; | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | static int ocfs2_remove_inode_range(struct inode *inode, | ||
| 1426 | struct buffer_head *di_bh, u64 byte_start, | ||
| 1427 | u64 byte_len) | ||
| 1428 | { | ||
| 1429 | int ret = 0; | ||
| 1430 | u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | ||
| 1431 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1432 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
| 1433 | |||
| 1434 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
| 1435 | |||
| 1436 | if (byte_len == 0) | ||
| 1437 | return 0; | ||
| 1438 | |||
| 1439 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | ||
| 1440 | trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | ||
| 1441 | if (trunc_len >= trunc_start) | ||
| 1442 | trunc_len -= trunc_start; | ||
| 1443 | else | ||
| 1444 | trunc_len = 0; | ||
| 1445 | |||
| 1446 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | ||
| 1447 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 1448 | (unsigned long long)byte_start, | ||
| 1449 | (unsigned long long)byte_len, trunc_start, trunc_len); | ||
| 1450 | |||
| 1451 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | ||
| 1452 | if (ret) { | ||
| 1453 | mlog_errno(ret); | ||
| 1454 | goto out; | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | cpos = trunc_start; | ||
| 1458 | while (trunc_len) { | ||
| 1459 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
| 1460 | &alloc_size, NULL); | ||
| 1461 | if (ret) { | ||
| 1462 | mlog_errno(ret); | ||
| 1463 | goto out; | ||
| 1464 | } | ||
| 1465 | |||
| 1466 | if (alloc_size > trunc_len) | ||
| 1467 | alloc_size = trunc_len; | ||
| 1468 | |||
| 1469 | /* Only do work for non-holes */ | ||
| 1470 | if (phys_cpos != 0) { | ||
| 1471 | ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, | ||
| 1472 | phys_cpos, alloc_size, | ||
| 1473 | &dealloc); | ||
| 1474 | if (ret) { | ||
| 1475 | mlog_errno(ret); | ||
| 1476 | goto out; | ||
| 1477 | } | ||
| 1478 | } | ||
| 1479 | |||
| 1480 | cpos += alloc_size; | ||
| 1481 | trunc_len -= alloc_size; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | ||
| 1485 | |||
| 1486 | out: | ||
| 1487 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 1488 | ocfs2_run_deallocs(osb, &dealloc); | ||
| 1489 | |||
| 1490 | return ret; | ||
| 1491 | } | ||
| 1492 | |||
| 1255 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1493 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
| 1256 | loff_t *ppos, | 1494 | loff_t *ppos, |
| 1257 | size_t count, | 1495 | size_t count, |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3db5de4506da..ce60aab013aa 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ | 289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ |
| 290 | + OCFS2_TRUNCATE_LOG_UPDATE) | 290 | + OCFS2_TRUNCATE_LOG_UPDATE) |
| 291 | 291 | ||
| 292 | #define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) | ||
| 293 | |||
| 292 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + | 294 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + |
| 293 | * bitmap block for the new bit) */ | 295 | * bitmap block for the new bit) */ |
| 294 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | 296 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) |
