aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:54:15 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commitba1da2f442ec91a1534afa893f9bef7e33056ace (patch)
treebc567aa83da6d709d3762f7e0bf9a5fe4d16de11
parentf9295749388f82c8d2f485e99c72cd7c7876a99b (diff)
Btrfs: Don't pin pages in ram until the entire ordered extent is on disk.
Checksum items are not inserted until the entire ordered extent is on disk, but individual pages might be clean and available for reclaim long before the whole extent is on disk. In order to allow those pages to be freed, we need to be able to search the list of ordered extents to find the checksum that is going to be inserted in the tree. This way if the page needs to be read back in before the checksums are in the btree, we'll be able to verify the checksum on the page. This commit adds the ability to search the pending ordered extents for a given offset in the file, and changes btrfs_releasepage to allow ordered pages to be freed. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/ordered-data.c48
-rw-r--r--fs/btrfs/ordered-data.h1
4 files changed, 69 insertions, 19 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d6505892cd52..3e4e5c227c0c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -251,7 +251,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
251 end_of_last_block = start_pos + num_bytes - 1; 251 end_of_last_block = start_pos + num_bytes - 1;
252 252
253 lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS); 253 lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
254 trans = btrfs_join_transaction(root, 1); 254 trans = btrfs_start_transaction(root, 1);
255 if (!trans) { 255 if (!trans) {
256 err = -ENOMEM; 256 err = -ENOMEM;
257 goto out_unlock; 257 goto out_unlock;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0a687326c0b0..293355c92a4f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -382,7 +382,7 @@ mapit:
382 return btrfs_map_bio(root, rw, bio, mirror_num, 0); 382 return btrfs_map_bio(root, rw, bio, mirror_num, 0);
383} 383}
384 384
385static int add_pending_csums(struct btrfs_trans_handle *trans, 385static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
386 struct inode *inode, u64 file_offset, 386 struct inode *inode, u64 file_offset,
387 struct list_head *list) 387 struct list_head *list)
388{ 388{
@@ -390,15 +390,12 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
390 struct btrfs_ordered_sum *sum; 390 struct btrfs_ordered_sum *sum;
391 391
392 btrfs_set_trans_block_group(trans, inode); 392 btrfs_set_trans_block_group(trans, inode);
393 while(!list_empty(list)) { 393 list_for_each(cur, list) {
394 cur = list->next;
395 sum = list_entry(cur, struct btrfs_ordered_sum, list); 394 sum = list_entry(cur, struct btrfs_ordered_sum, list);
396 mutex_lock(&BTRFS_I(inode)->csum_mutex); 395 mutex_lock(&BTRFS_I(inode)->csum_mutex);
397 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, 396 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
398 inode, sum); 397 inode, sum);
399 mutex_unlock(&BTRFS_I(inode)->csum_mutex); 398 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
400 list_del(&sum->list);
401 kfree(sum);
402 } 399 }
403 return 0; 400 return 0;
404} 401}
@@ -498,9 +495,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
498 int ret; 495 int ret;
499 496
500 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 497 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1);
501 if (!ret) { 498 if (!ret)
502 return 0; 499 return 0;
503 }
504 500
505 trans = btrfs_join_transaction(root, 1); 501 trans = btrfs_join_transaction(root, 1);
506 502
@@ -571,6 +567,18 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
571 path = btrfs_alloc_path(); 567 path = btrfs_alloc_path();
572 item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); 568 item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
573 if (IS_ERR(item)) { 569 if (IS_ERR(item)) {
570 /*
571 * It is possible there is an ordered extent that has
572 * not yet finished for this range in the file. If so,
573 * that extent will have a csum cached, and it will insert
574 * the sum after all the blocks in the extent are fully
575 * on disk. So, look for an ordered extent and use the
576 * sum if found.
577 */
578 ret = btrfs_find_ordered_sum(inode, start, &csum);
579 if (ret == 0)
580 goto found;
581
574 ret = PTR_ERR(item); 582 ret = PTR_ERR(item);
575 /* a csum that isn't present is a preallocated region. */ 583 /* a csum that isn't present is a preallocated region. */
576 if (ret == -ENOENT || ret == -EFBIG) 584 if (ret == -ENOENT || ret == -EFBIG)
@@ -582,6 +590,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
582 } 590 }
583 read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, 591 read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
584 BTRFS_CRC32_SIZE); 592 BTRFS_CRC32_SIZE);
593found:
585 set_state_private(io_tree, start, csum); 594 set_state_private(io_tree, start, csum);
586out: 595out:
587 if (path) 596 if (path)
@@ -888,7 +897,7 @@ static void fill_inode_item(struct extent_buffer *leaf,
888 BTRFS_I(inode)->block_group->key.objectid); 897 BTRFS_I(inode)->block_group->key.objectid);
889} 898}
890 899
891int btrfs_update_inode(struct btrfs_trans_handle *trans, 900int noinline btrfs_update_inode(struct btrfs_trans_handle *trans,
892 struct btrfs_root *root, 901 struct btrfs_root *root,
893 struct inode *inode) 902 struct inode *inode)
894{ 903{
@@ -1567,6 +1576,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
1567 inode->i_mapping, GFP_NOFS); 1576 inode->i_mapping, GFP_NOFS);
1568 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 1577 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1569 inode->i_mapping, GFP_NOFS); 1578 inode->i_mapping, GFP_NOFS);
1579 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
1570 mutex_init(&BTRFS_I(inode)->csum_mutex); 1580 mutex_init(&BTRFS_I(inode)->csum_mutex);
1571 return 0; 1581 return 0;
1572} 1582}
@@ -1868,6 +1878,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1868 inode->i_mapping, GFP_NOFS); 1878 inode->i_mapping, GFP_NOFS);
1869 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 1879 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1870 inode->i_mapping, GFP_NOFS); 1880 inode->i_mapping, GFP_NOFS);
1881 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
1871 mutex_init(&BTRFS_I(inode)->csum_mutex); 1882 mutex_init(&BTRFS_I(inode)->csum_mutex);
1872 BTRFS_I(inode)->delalloc_bytes = 0; 1883 BTRFS_I(inode)->delalloc_bytes = 0;
1873 BTRFS_I(inode)->disk_i_size = 0; 1884 BTRFS_I(inode)->disk_i_size = 0;
@@ -2097,6 +2108,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
2097 BTRFS_I(inode)->delalloc_bytes = 0; 2108 BTRFS_I(inode)->delalloc_bytes = 0;
2098 BTRFS_I(inode)->disk_i_size = 0; 2109 BTRFS_I(inode)->disk_i_size = 0;
2099 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 2110 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2111 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
2100 } 2112 }
2101 dir->i_sb->s_dirt = 1; 2113 dir->i_sb->s_dirt = 1;
2102 btrfs_update_inode_block_group(trans, inode); 2114 btrfs_update_inode_block_group(trans, inode);
@@ -2618,14 +2630,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2618 2630
2619static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) 2631static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2620{ 2632{
2621 struct btrfs_ordered_extent *ordered;
2622
2623 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
2624 page_offset(page));
2625 if (ordered) {
2626 btrfs_put_ordered_extent(ordered);
2627 return 0;
2628 }
2629 return __btrfs_releasepage(page, gfp_flags); 2633 return __btrfs_releasepage(page, gfp_flags);
2630} 2634}
2631 2635
@@ -3078,6 +3082,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3078 BTRFS_I(inode)->delalloc_bytes = 0; 3082 BTRFS_I(inode)->delalloc_bytes = 0;
3079 BTRFS_I(inode)->disk_i_size = 0; 3083 BTRFS_I(inode)->disk_i_size = 0;
3080 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 3084 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3085 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3081 } 3086 }
3082 dir->i_sb->s_dirt = 1; 3087 dir->i_sb->s_dirt = 1;
3083 btrfs_update_inode_block_group(trans, inode); 3088 btrfs_update_inode_block_group(trans, inode);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index d86a953ae51d..b739e3abebb9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -245,8 +245,18 @@ out:
245 245
246int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 246int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
247{ 247{
248 if (atomic_dec_and_test(&entry->refs)) 248 struct list_head *cur;
249 struct btrfs_ordered_sum *sum;
250
251 if (atomic_dec_and_test(&entry->refs)) {
252 while(!list_empty(&entry->list)) {
253 cur = entry->list.next;
254 sum = list_entry(cur, struct btrfs_ordered_sum, list);
255 list_del(&sum->list);
256 kfree(sum);
257 }
249 kfree(entry); 258 kfree(entry);
259 }
250 return 0; 260 return 0;
251} 261}
252 262
@@ -444,8 +454,9 @@ int btrfs_ordered_update_i_size(struct inode *inode,
444 * if we find an ordered extent then we can't update disk i_size 454 * if we find an ordered extent then we can't update disk i_size
445 * yet 455 * yet
446 */ 456 */
457 node = &ordered->rb_node;
447 while(1) { 458 while(1) {
448 node = rb_prev(&ordered->rb_node); 459 node = rb_prev(node);
449 if (!node) 460 if (!node)
450 break; 461 break;
451 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 462 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
@@ -495,3 +506,36 @@ out:
495 mutex_unlock(&tree->mutex); 506 mutex_unlock(&tree->mutex);
496 return 0; 507 return 0;
497} 508}
509
510int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
511{
512 struct btrfs_ordered_sum *ordered_sum;
513 struct btrfs_sector_sum *sector_sums;
514 struct btrfs_ordered_extent *ordered;
515 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
516 struct list_head *cur;
517 int ret = 1;
518 int index;
519
520 ordered = btrfs_lookup_ordered_extent(inode, offset);
521 if (!ordered)
522 return 1;
523
524 mutex_lock(&tree->mutex);
525 list_for_each_prev(cur, &ordered->list) {
526 ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
527 if (offset >= ordered_sum->file_offset &&
528 offset < ordered_sum->file_offset + ordered_sum->len) {
529 index = (offset - ordered_sum->file_offset) /
530 BTRFS_I(inode)->root->sectorsize;;
531 sector_sums = &ordered_sum->sums;
532 *sum = sector_sums[index].sum;
533 ret = 0;
534 goto out;
535 }
536 }
537out:
538 mutex_unlock(&tree->mutex);
539 return ret;
540}
541
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 40e9126ad954..33f0d9e91b11 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -91,4 +91,5 @@ int btrfs_add_ordered_pending(struct inode *inode,
91 u64 start, u64 len); 91 u64 start, u64 len);
92int btrfs_ordered_update_i_size(struct inode *inode, 92int btrfs_ordered_update_i_size(struct inode *inode,
93 struct btrfs_ordered_extent *ordered); 93 struct btrfs_ordered_extent *ordered);
94int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
94#endif 95#endif