diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-03 16:34:11 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2007-07-10 20:32:08 -0400 |
commit | 063c4561f52a74de686fe0ff2f96f4f54c9fecd2 (patch) | |
tree | 73a202c316df70bdfafa489d70e2863c5c5ea33a | |
parent | 35edec1d52c075975991471d624b33b9336226f2 (diff) |
ocfs2: support for removing file regions
Provide an internal interface for the removal of arbitrary file regions.
ocfs2_remove_inode_range() takes a byte range within a file and will remove
existing extents within that range. Partial clusters will be zeroed so that
any read from within the region will return zeros.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
-rw-r--r-- | fs/ocfs2/alloc.c | 20 | ||||
-rw-r--r-- | fs/ocfs2/alloc.h | 10 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 242 | ||||
-rw-r--r-- | fs/ocfs2/journal.h | 2 |
4 files changed, 262 insertions, 12 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index df186d2e8248..f5e11f4fa952 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -4373,10 +4373,10 @@ out: | |||
4373 | return ret; | 4373 | return ret; |
4374 | } | 4374 | } |
4375 | 4375 | ||
4376 | static int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 4376 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, |
4377 | u32 cpos, u32 len, handle_t *handle, | 4377 | u32 cpos, u32 len, handle_t *handle, |
4378 | struct ocfs2_alloc_context *meta_ac, | 4378 | struct ocfs2_alloc_context *meta_ac, |
4379 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 4379 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
4380 | { | 4380 | { |
4381 | int ret, index; | 4381 | int ret, index; |
4382 | u32 rec_range, trunc_range; | 4382 | u32 rec_range, trunc_range; |
@@ -4506,7 +4506,7 @@ out: | |||
4506 | return ret; | 4506 | return ret; |
4507 | } | 4507 | } |
4508 | 4508 | ||
4509 | static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | 4509 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) |
4510 | { | 4510 | { |
4511 | struct buffer_head *tl_bh = osb->osb_tl_bh; | 4511 | struct buffer_head *tl_bh = osb->osb_tl_bh; |
4512 | struct ocfs2_dinode *di; | 4512 | struct ocfs2_dinode *di; |
@@ -4539,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, | |||
4539 | return current_tail == new_start; | 4539 | return current_tail == new_start; |
4540 | } | 4540 | } |
4541 | 4541 | ||
4542 | static int ocfs2_truncate_log_append(struct ocfs2_super *osb, | 4542 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, |
4543 | handle_t *handle, | 4543 | handle_t *handle, |
4544 | u64 start_blk, | 4544 | u64 start_blk, |
4545 | unsigned int num_clusters) | 4545 | unsigned int num_clusters) |
4546 | { | 4546 | { |
4547 | int status, index; | 4547 | int status, index; |
4548 | unsigned int start_cluster, tl_count; | 4548 | unsigned int start_cluster, tl_count; |
@@ -4698,7 +4698,7 @@ bail: | |||
4698 | } | 4698 | } |
4699 | 4699 | ||
4700 | /* Expects you to already be holding tl_inode->i_mutex */ | 4700 | /* Expects you to already be holding tl_inode->i_mutex */ |
4701 | static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | 4701 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) |
4702 | { | 4702 | { |
4703 | int status; | 4703 | int status; |
4704 | unsigned int num_to_flush; | 4704 | unsigned int num_to_flush; |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 752ef860873d..990df48ae8d3 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
@@ -41,6 +41,10 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 41 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
42 | struct ocfs2_alloc_context *meta_ac, | 42 | struct ocfs2_alloc_context *meta_ac, |
43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 43 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||
45 | u32 cpos, u32 len, handle_t *handle, | ||
46 | struct ocfs2_alloc_context *meta_ac, | ||
47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | ||
44 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
45 | struct inode *inode, | 49 | struct inode *inode, |
46 | struct ocfs2_dinode *fe); | 50 | struct ocfs2_dinode *fe); |
@@ -68,6 +72,12 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
68 | struct ocfs2_dinode **tl_copy); | 72 | struct ocfs2_dinode **tl_copy); |
69 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, | 73 | int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, |
70 | struct ocfs2_dinode *tl_copy); | 74 | struct ocfs2_dinode *tl_copy); |
75 | int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb); | ||
76 | int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||
77 | handle_t *handle, | ||
78 | u64 start_blk, | ||
79 | unsigned int num_clusters); | ||
80 | int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); | ||
71 | 81 | ||
72 | /* | 82 | /* |
73 | * Process local structure which describes the block unlinks done | 83 | * Process local structure which describes the block unlinks done |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f0a6b1330a6e..11f7cf9f2511 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -541,12 +541,15 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
541 | struct ocfs2_alloc_context **data_ac, | 541 | struct ocfs2_alloc_context **data_ac, |
542 | struct ocfs2_alloc_context **meta_ac) | 542 | struct ocfs2_alloc_context **meta_ac) |
543 | { | 543 | { |
544 | int ret, num_free_extents; | 544 | int ret = 0, num_free_extents; |
545 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | 545 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; |
546 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 546 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
547 | 547 | ||
548 | *meta_ac = NULL; | 548 | *meta_ac = NULL; |
549 | *data_ac = NULL; | 549 | if (data_ac) |
550 | *data_ac = NULL; | ||
551 | |||
552 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
550 | 553 | ||
551 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 554 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
552 | "clusters_to_add = %u, extents_to_split = %u\n", | 555 | "clusters_to_add = %u, extents_to_split = %u\n", |
@@ -583,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
583 | } | 586 | } |
584 | } | 587 | } |
585 | 588 | ||
589 | if (clusters_to_add == 0) | ||
590 | goto out; | ||
591 | |||
586 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | 592 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); |
587 | if (ret < 0) { | 593 | if (ret < 0) { |
588 | if (ret != -ENOSPC) | 594 | if (ret != -ENOSPC) |
@@ -1252,6 +1258,238 @@ out: | |||
1252 | return ret; | 1258 | return ret; |
1253 | } | 1259 | } |
1254 | 1260 | ||
1261 | static int __ocfs2_remove_inode_range(struct inode *inode, | ||
1262 | struct buffer_head *di_bh, | ||
1263 | u32 cpos, u32 phys_cpos, u32 len, | ||
1264 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
1265 | { | ||
1266 | int ret; | ||
1267 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
1268 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1269 | struct inode *tl_inode = osb->osb_tl_inode; | ||
1270 | handle_t *handle; | ||
1271 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1272 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1273 | |||
1274 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | ||
1275 | if (ret) { | ||
1276 | mlog_errno(ret); | ||
1277 | return ret; | ||
1278 | } | ||
1279 | |||
1280 | mutex_lock(&tl_inode->i_mutex); | ||
1281 | |||
1282 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
1283 | ret = __ocfs2_flush_truncate_log(osb); | ||
1284 | if (ret < 0) { | ||
1285 | mlog_errno(ret); | ||
1286 | goto out; | ||
1287 | } | ||
1288 | } | ||
1289 | |||
1290 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
1291 | if (handle == NULL) { | ||
1292 | ret = -ENOMEM; | ||
1293 | mlog_errno(ret); | ||
1294 | goto out; | ||
1295 | } | ||
1296 | |||
1297 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
1298 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1299 | if (ret) { | ||
1300 | mlog_errno(ret); | ||
1301 | goto out; | ||
1302 | } | ||
1303 | |||
1304 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | ||
1305 | dealloc); | ||
1306 | if (ret) { | ||
1307 | mlog_errno(ret); | ||
1308 | goto out_commit; | ||
1309 | } | ||
1310 | |||
1311 | OCFS2_I(inode)->ip_clusters -= len; | ||
1312 | di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | ||
1313 | |||
1314 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1315 | if (ret) { | ||
1316 | mlog_errno(ret); | ||
1317 | goto out_commit; | ||
1318 | } | ||
1319 | |||
1320 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
1321 | if (ret) | ||
1322 | mlog_errno(ret); | ||
1323 | |||
1324 | out_commit: | ||
1325 | ocfs2_commit_trans(osb, handle); | ||
1326 | out: | ||
1327 | mutex_unlock(&tl_inode->i_mutex); | ||
1328 | |||
1329 | if (meta_ac) | ||
1330 | ocfs2_free_alloc_context(meta_ac); | ||
1331 | |||
1332 | return ret; | ||
1333 | } | ||
1334 | |||
1335 | /* | ||
1336 | * Truncate a byte range, avoiding pages within partial clusters. This | ||
1337 | * preserves those pages for the zeroing code to write to. | ||
1338 | */ | ||
1339 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | ||
1340 | u64 byte_len) | ||
1341 | { | ||
1342 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1343 | loff_t start, end; | ||
1344 | struct address_space *mapping = inode->i_mapping; | ||
1345 | |||
1346 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | ||
1347 | end = byte_start + byte_len; | ||
1348 | end = end & ~(osb->s_clustersize - 1); | ||
1349 | |||
1350 | if (start < end) { | ||
1351 | unmap_mapping_range(mapping, start, end - start, 0); | ||
1352 | truncate_inode_pages_range(mapping, start, end - 1); | ||
1353 | } | ||
1354 | } | ||
1355 | |||
1356 | static int ocfs2_zero_partial_clusters(struct inode *inode, | ||
1357 | u64 start, u64 len) | ||
1358 | { | ||
1359 | int ret = 0; | ||
1360 | u64 tmpend, end = start + len; | ||
1361 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1362 | unsigned int csize = osb->s_clustersize; | ||
1363 | handle_t *handle; | ||
1364 | |||
1365 | /* | ||
1366 | * The "start" and "end" values are NOT necessarily part of | ||
1367 | * the range whose allocation is being deleted. Rather, this | ||
1368 | * is what the user passed in with the request. We must zero | ||
1369 | * partial clusters here. There's no need to worry about | ||
1370 | * physical allocation - the zeroing code knows to skip holes. | ||
1371 | */ | ||
1372 | mlog(0, "byte start: %llu, end: %llu\n", | ||
1373 | (unsigned long long)start, (unsigned long long)end); | ||
1374 | |||
1375 | /* | ||
1376 | * If both edges are on a cluster boundary then there's no | ||
1377 | * zeroing required as the region is part of the allocation to | ||
1378 | * be truncated. | ||
1379 | */ | ||
1380 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | ||
1381 | goto out; | ||
1382 | |||
1383 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1384 | if (handle == NULL) { | ||
1385 | ret = -ENOMEM; | ||
1386 | mlog_errno(ret); | ||
1387 | goto out; | ||
1388 | } | ||
1389 | |||
1390 | /* | ||
1391 | * We want to get the byte offset of the end of the 1st cluster. | ||
1392 | */ | ||
1393 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | ||
1394 | if (tmpend > end) | ||
1395 | tmpend = end; | ||
1396 | |||
1397 | mlog(0, "1st range: start: %llu, tmpend: %llu\n", | ||
1398 | (unsigned long long)start, (unsigned long long)tmpend); | ||
1399 | |||
1400 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | ||
1401 | if (ret) | ||
1402 | mlog_errno(ret); | ||
1403 | |||
1404 | if (tmpend < end) { | ||
1405 | /* | ||
1406 | * This may make start and end equal, but the zeroing | ||
1407 | * code will skip any work in that case so there's no | ||
1408 | * need to catch it up here. | ||
1409 | */ | ||
1410 | start = end & ~(osb->s_clustersize - 1); | ||
1411 | |||
1412 | mlog(0, "2nd range: start: %llu, end: %llu\n", | ||
1413 | (unsigned long long)start, (unsigned long long)end); | ||
1414 | |||
1415 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | ||
1416 | if (ret) | ||
1417 | mlog_errno(ret); | ||
1418 | } | ||
1419 | |||
1420 | ocfs2_commit_trans(osb, handle); | ||
1421 | out: | ||
1422 | return ret; | ||
1423 | } | ||
1424 | |||
1425 | static int ocfs2_remove_inode_range(struct inode *inode, | ||
1426 | struct buffer_head *di_bh, u64 byte_start, | ||
1427 | u64 byte_len) | ||
1428 | { | ||
1429 | int ret = 0; | ||
1430 | u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | ||
1431 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1432 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
1433 | |||
1434 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
1435 | |||
1436 | if (byte_len == 0) | ||
1437 | return 0; | ||
1438 | |||
1439 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | ||
1440 | trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | ||
1441 | if (trunc_len >= trunc_start) | ||
1442 | trunc_len -= trunc_start; | ||
1443 | else | ||
1444 | trunc_len = 0; | ||
1445 | |||
1446 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | ||
1447 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1448 | (unsigned long long)byte_start, | ||
1449 | (unsigned long long)byte_len, trunc_start, trunc_len); | ||
1450 | |||
1451 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | ||
1452 | if (ret) { | ||
1453 | mlog_errno(ret); | ||
1454 | goto out; | ||
1455 | } | ||
1456 | |||
1457 | cpos = trunc_start; | ||
1458 | while (trunc_len) { | ||
1459 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
1460 | &alloc_size, NULL); | ||
1461 | if (ret) { | ||
1462 | mlog_errno(ret); | ||
1463 | goto out; | ||
1464 | } | ||
1465 | |||
1466 | if (alloc_size > trunc_len) | ||
1467 | alloc_size = trunc_len; | ||
1468 | |||
1469 | /* Only do work for non-holes */ | ||
1470 | if (phys_cpos != 0) { | ||
1471 | ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, | ||
1472 | phys_cpos, alloc_size, | ||
1473 | &dealloc); | ||
1474 | if (ret) { | ||
1475 | mlog_errno(ret); | ||
1476 | goto out; | ||
1477 | } | ||
1478 | } | ||
1479 | |||
1480 | cpos += alloc_size; | ||
1481 | trunc_len -= alloc_size; | ||
1482 | } | ||
1483 | |||
1484 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | ||
1485 | |||
1486 | out: | ||
1487 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
1488 | ocfs2_run_deallocs(osb, &dealloc); | ||
1489 | |||
1490 | return ret; | ||
1491 | } | ||
1492 | |||
1255 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1493 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
1256 | loff_t *ppos, | 1494 | loff_t *ppos, |
1257 | size_t count, | 1495 | size_t count, |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3db5de4506da..ce60aab013aa 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ | 289 | #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ |
290 | + OCFS2_TRUNCATE_LOG_UPDATE) | 290 | + OCFS2_TRUNCATE_LOG_UPDATE) |
291 | 291 | ||
292 | #define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) | ||
293 | |||
292 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + | 294 | /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + |
293 | * bitmap block for the new bit) */ | 295 | * bitmap block for the new bit) */ |
294 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | 296 | #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) |