diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-17 12:54:15 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:04 -0400 |
commit | ba1da2f442ec91a1534afa893f9bef7e33056ace (patch) | |
tree | bc567aa83da6d709d3762f7e0bf9a5fe4d16de11 /fs/btrfs/inode.c | |
parent | f9295749388f82c8d2f485e99c72cd7c7876a99b (diff) |
Btrfs: Don't pin pages in ram until the entire ordered extent is on disk.
Checksum items are not inserted until the entire ordered extent is on disk,
but individual pages might be clean and available for reclaim long before
the whole extent is on disk.
In order to allow those pages to be freed, we need to be able to search
the list of ordered extents to find the checksum that is going to be inserted
in the tree. This way if the page needs to be read back in before
the checksums are in the btree, we'll be able to verify the checksum on
the page.
This commit adds the ability to search the pending ordered extents for
a given offset in the file, and changes btrfs_releasepage to allow
ordered pages to be freed.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0a687326c0b0..293355c92a4f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -382,7 +382,7 @@ mapit: | |||
382 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | 382 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); |
383 | } | 383 | } |
384 | 384 | ||
385 | static int add_pending_csums(struct btrfs_trans_handle *trans, | 385 | static noinline int add_pending_csums(struct btrfs_trans_handle *trans, |
386 | struct inode *inode, u64 file_offset, | 386 | struct inode *inode, u64 file_offset, |
387 | struct list_head *list) | 387 | struct list_head *list) |
388 | { | 388 | { |
@@ -390,15 +390,12 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, | |||
390 | struct btrfs_ordered_sum *sum; | 390 | struct btrfs_ordered_sum *sum; |
391 | 391 | ||
392 | btrfs_set_trans_block_group(trans, inode); | 392 | btrfs_set_trans_block_group(trans, inode); |
393 | while(!list_empty(list)) { | 393 | list_for_each(cur, list) { |
394 | cur = list->next; | ||
395 | sum = list_entry(cur, struct btrfs_ordered_sum, list); | 394 | sum = list_entry(cur, struct btrfs_ordered_sum, list); |
396 | mutex_lock(&BTRFS_I(inode)->csum_mutex); | 395 | mutex_lock(&BTRFS_I(inode)->csum_mutex); |
397 | btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, | 396 | btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, |
398 | inode, sum); | 397 | inode, sum); |
399 | mutex_unlock(&BTRFS_I(inode)->csum_mutex); | 398 | mutex_unlock(&BTRFS_I(inode)->csum_mutex); |
400 | list_del(&sum->list); | ||
401 | kfree(sum); | ||
402 | } | 399 | } |
403 | return 0; | 400 | return 0; |
404 | } | 401 | } |
@@ -498,9 +495,8 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
498 | int ret; | 495 | int ret; |
499 | 496 | ||
500 | ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); | 497 | ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); |
501 | if (!ret) { | 498 | if (!ret) |
502 | return 0; | 499 | return 0; |
503 | } | ||
504 | 500 | ||
505 | trans = btrfs_join_transaction(root, 1); | 501 | trans = btrfs_join_transaction(root, 1); |
506 | 502 | ||
@@ -571,6 +567,18 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) | |||
571 | path = btrfs_alloc_path(); | 567 | path = btrfs_alloc_path(); |
572 | item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); | 568 | item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0); |
573 | if (IS_ERR(item)) { | 569 | if (IS_ERR(item)) { |
570 | /* | ||
571 | * It is possible there is an ordered extent that has | ||
572 | * not yet finished for this range in the file. If so, | ||
573 | * that extent will have a csum cached, and it will insert | ||
574 | * the sum after all the blocks in the extent are fully | ||
575 | * on disk. So, look for an ordered extent and use the | ||
576 | * sum if found. | ||
577 | */ | ||
578 | ret = btrfs_find_ordered_sum(inode, start, &csum); | ||
579 | if (ret == 0) | ||
580 | goto found; | ||
581 | |||
574 | ret = PTR_ERR(item); | 582 | ret = PTR_ERR(item); |
575 | /* a csum that isn't present is a preallocated region. */ | 583 | /* a csum that isn't present is a preallocated region. */ |
576 | if (ret == -ENOENT || ret == -EFBIG) | 584 | if (ret == -ENOENT || ret == -EFBIG) |
@@ -582,6 +590,7 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) | |||
582 | } | 590 | } |
583 | read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, | 591 | read_extent_buffer(path->nodes[0], &csum, (unsigned long)item, |
584 | BTRFS_CRC32_SIZE); | 592 | BTRFS_CRC32_SIZE); |
593 | found: | ||
585 | set_state_private(io_tree, start, csum); | 594 | set_state_private(io_tree, start, csum); |
586 | out: | 595 | out: |
587 | if (path) | 596 | if (path) |
@@ -888,7 +897,7 @@ static void fill_inode_item(struct extent_buffer *leaf, | |||
888 | BTRFS_I(inode)->block_group->key.objectid); | 897 | BTRFS_I(inode)->block_group->key.objectid); |
889 | } | 898 | } |
890 | 899 | ||
891 | int btrfs_update_inode(struct btrfs_trans_handle *trans, | 900 | int noinline btrfs_update_inode(struct btrfs_trans_handle *trans, |
892 | struct btrfs_root *root, | 901 | struct btrfs_root *root, |
893 | struct inode *inode) | 902 | struct inode *inode) |
894 | { | 903 | { |
@@ -1567,6 +1576,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) | |||
1567 | inode->i_mapping, GFP_NOFS); | 1576 | inode->i_mapping, GFP_NOFS); |
1568 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | 1577 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, |
1569 | inode->i_mapping, GFP_NOFS); | 1578 | inode->i_mapping, GFP_NOFS); |
1579 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
1570 | mutex_init(&BTRFS_I(inode)->csum_mutex); | 1580 | mutex_init(&BTRFS_I(inode)->csum_mutex); |
1571 | return 0; | 1581 | return 0; |
1572 | } | 1582 | } |
@@ -1868,6 +1878,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
1868 | inode->i_mapping, GFP_NOFS); | 1878 | inode->i_mapping, GFP_NOFS); |
1869 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | 1879 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, |
1870 | inode->i_mapping, GFP_NOFS); | 1880 | inode->i_mapping, GFP_NOFS); |
1881 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
1871 | mutex_init(&BTRFS_I(inode)->csum_mutex); | 1882 | mutex_init(&BTRFS_I(inode)->csum_mutex); |
1872 | BTRFS_I(inode)->delalloc_bytes = 0; | 1883 | BTRFS_I(inode)->delalloc_bytes = 0; |
1873 | BTRFS_I(inode)->disk_i_size = 0; | 1884 | BTRFS_I(inode)->disk_i_size = 0; |
@@ -2097,6 +2108,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
2097 | BTRFS_I(inode)->delalloc_bytes = 0; | 2108 | BTRFS_I(inode)->delalloc_bytes = 0; |
2098 | BTRFS_I(inode)->disk_i_size = 0; | 2109 | BTRFS_I(inode)->disk_i_size = 0; |
2099 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 2110 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
2111 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
2100 | } | 2112 | } |
2101 | dir->i_sb->s_dirt = 1; | 2113 | dir->i_sb->s_dirt = 1; |
2102 | btrfs_update_inode_block_group(trans, inode); | 2114 | btrfs_update_inode_block_group(trans, inode); |
@@ -2618,14 +2630,6 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
2618 | 2630 | ||
2619 | static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | 2631 | static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) |
2620 | { | 2632 | { |
2621 | struct btrfs_ordered_extent *ordered; | ||
2622 | |||
2623 | ordered = btrfs_lookup_ordered_extent(page->mapping->host, | ||
2624 | page_offset(page)); | ||
2625 | if (ordered) { | ||
2626 | btrfs_put_ordered_extent(ordered); | ||
2627 | return 0; | ||
2628 | } | ||
2629 | return __btrfs_releasepage(page, gfp_flags); | 2633 | return __btrfs_releasepage(page, gfp_flags); |
2630 | } | 2634 | } |
2631 | 2635 | ||
@@ -3078,6 +3082,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
3078 | BTRFS_I(inode)->delalloc_bytes = 0; | 3082 | BTRFS_I(inode)->delalloc_bytes = 0; |
3079 | BTRFS_I(inode)->disk_i_size = 0; | 3083 | BTRFS_I(inode)->disk_i_size = 0; |
3080 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 3084 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
3085 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
3081 | } | 3086 | } |
3082 | dir->i_sb->s_dirt = 1; | 3087 | dir->i_sb->s_dirt = 1; |
3083 | btrfs_update_inode_block_group(trans, inode); | 3088 | btrfs_update_inode_block_group(trans, inode); |