aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-07-17 12:53:50 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:04 -0400
commite6dcd2dc9c489108648e2ed543315dd134d50a9a (patch)
treecddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs/btrfs/file.c
parent77a41afb7d0dd0f27b6f2f1a5bc701929c7034de (diff)
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until all the data extents from the transaction were fully on disk. This introduced large latencies into the commit and stalled new writers in the transaction for a long time. The new code changes the way data allocations and extents work: * When delayed allocation is filled, data extents are reserved, and the extent bit EXTENT_ORDERED is set on the entire range of the extent. A struct btrfs_ordered_extent is allocated an inserted into a per-inode rbtree to track the pending extents. * As each page is written EXTENT_ORDERED is cleared on the bytes corresponding to that page. * When all of the bytes corresponding to a single struct btrfs_ordered_extent are written, The previously reserved extent is inserted into the FS btree and into the extent allocation trees. The checksums for the file data are also updated. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c67
1 files changed, 45 insertions, 22 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8037792f8789..12e765f7e0d4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -34,7 +34,6 @@
34#include "disk-io.h" 34#include "disk-io.h"
35#include "transaction.h" 35#include "transaction.h"
36#include "btrfs_inode.h" 36#include "btrfs_inode.h"
37#include "ordered-data.h"
38#include "ioctl.h" 37#include "ioctl.h"
39#include "print-tree.h" 38#include "print-tree.h"
40#include "compat.h" 39#include "compat.h"
@@ -273,7 +272,9 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
273 u64 mask = root->sectorsize - 1; 272 u64 mask = root->sectorsize - 1;
274 last_pos_in_file = (isize + mask) & ~mask; 273 last_pos_in_file = (isize + mask) & ~mask;
275 hole_size = (start_pos - last_pos_in_file + mask) & ~mask; 274 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
276 if (last_pos_in_file < start_pos) { 275 if (hole_size > 0) {
276 btrfs_wait_ordered_range(inode, last_pos_in_file,
277 last_pos_in_file + hole_size);
277 err = btrfs_drop_extents(trans, root, inode, 278 err = btrfs_drop_extents(trans, root, inode,
278 last_pos_in_file, 279 last_pos_in_file,
279 last_pos_in_file + hole_size, 280 last_pos_in_file + hole_size,
@@ -303,19 +304,17 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
303 inline_size > root->fs_info->max_inline || 304 inline_size > root->fs_info->max_inline ||
304 (inline_size & (root->sectorsize -1)) == 0 || 305 (inline_size & (root->sectorsize -1)) == 0 ||
305 inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) { 306 inline_size >= BTRFS_MAX_INLINE_DATA_SIZE(root)) {
306 u64 last_end; 307 /* check for reserved extents on each page, we don't want
307 308 * to reset the delalloc bit on things that already have
309 * extents reserved.
310 */
311 set_extent_delalloc(io_tree, start_pos,
312 end_of_last_block, GFP_NOFS);
308 for (i = 0; i < num_pages; i++) { 313 for (i = 0; i < num_pages; i++) {
309 struct page *p = pages[i]; 314 struct page *p = pages[i];
310 SetPageUptodate(p); 315 SetPageUptodate(p);
311 set_page_dirty(p); 316 set_page_dirty(p);
312 } 317 }
313 last_end = (u64)(pages[num_pages -1]->index) <<
314 PAGE_CACHE_SHIFT;
315 last_end += PAGE_CACHE_SIZE - 1;
316 set_extent_delalloc(io_tree, start_pos, end_of_last_block,
317 GFP_NOFS);
318 btrfs_add_ordered_inode(inode);
319 } else { 318 } else {
320 u64 aligned_end; 319 u64 aligned_end;
321 /* step one, delete the existing extents in this range */ 320 /* step one, delete the existing extents in this range */
@@ -350,10 +349,13 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
350 struct extent_map *split = NULL; 349 struct extent_map *split = NULL;
351 struct extent_map *split2 = NULL; 350 struct extent_map *split2 = NULL;
352 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 351 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
352 struct extent_map *tmp;
353 u64 len = end - start + 1; 353 u64 len = end - start + 1;
354 u64 next_start;
354 int ret; 355 int ret;
355 int testend = 1; 356 int testend = 1;
356 357
358 WARN_ON(end < start);
357 if (end == (u64)-1) { 359 if (end == (u64)-1) {
358 len = (u64)-1; 360 len = (u64)-1;
359 testend = 0; 361 testend = 0;
@@ -370,6 +372,8 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
370 spin_unlock(&em_tree->lock); 372 spin_unlock(&em_tree->lock);
371 break; 373 break;
372 } 374 }
375 tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
376 next_start = tmp->start;
373 remove_extent_mapping(em_tree, em); 377 remove_extent_mapping(em_tree, em);
374 378
375 if (em->block_start < EXTENT_MAP_LAST_BYTE && 379 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
@@ -778,37 +782,58 @@ static int prepare_pages(struct btrfs_root *root, struct file *file,
778 struct inode *inode = fdentry(file)->d_inode; 782 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 783 int err = 0;
780 u64 start_pos; 784 u64 start_pos;
785 u64 last_pos;
781 786
782 start_pos = pos & ~((u64)root->sectorsize - 1); 787 start_pos = pos & ~((u64)root->sectorsize - 1);
788 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
783 789
784 memset(pages, 0, num_pages * sizeof(struct page *)); 790 memset(pages, 0, num_pages * sizeof(struct page *));
785 791again:
786 for (i = 0; i < num_pages; i++) { 792 for (i = 0; i < num_pages; i++) {
787 pages[i] = grab_cache_page(inode->i_mapping, index + i); 793 pages[i] = grab_cache_page(inode->i_mapping, index + i);
788 if (!pages[i]) { 794 if (!pages[i]) {
789 err = -ENOMEM; 795 err = -ENOMEM;
790 BUG_ON(1); 796 BUG_ON(1);
791 } 797 }
792#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
793 ClearPageDirty(pages[i]);
794#else
795 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
796#endif
797 wait_on_page_writeback(pages[i]); 798 wait_on_page_writeback(pages[i]);
798 set_page_extent_mapped(pages[i]);
799 WARN_ON(!PageLocked(pages[i]));
800 } 799 }
801 if (start_pos < inode->i_size) { 800 if (start_pos < inode->i_size) {
802 u64 last_pos; 801 struct btrfs_ordered_extent *ordered;
803 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
804 lock_extent(&BTRFS_I(inode)->io_tree, 802 lock_extent(&BTRFS_I(inode)->io_tree,
805 start_pos, last_pos - 1, GFP_NOFS); 803 start_pos, last_pos - 1, GFP_NOFS);
804 ordered = btrfs_lookup_first_ordered_extent(inode, last_pos -1);
805 if (ordered &&
806 ordered->file_offset + ordered->len > start_pos &&
807 ordered->file_offset < last_pos) {
808 btrfs_put_ordered_extent(ordered);
809 unlock_extent(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS);
811 for (i = 0; i < num_pages; i++) {
812 unlock_page(pages[i]);
813 page_cache_release(pages[i]);
814 }
815 btrfs_wait_ordered_range(inode, start_pos,
816 last_pos - start_pos);
817 goto again;
818 }
819 if (ordered)
820 btrfs_put_ordered_extent(ordered);
821
806 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 822 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos,
807 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 823 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC,
808 GFP_NOFS); 824 GFP_NOFS);
809 unlock_extent(&BTRFS_I(inode)->io_tree, 825 unlock_extent(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS); 826 start_pos, last_pos - 1, GFP_NOFS);
811 } 827 }
828 for (i = 0; i < num_pages; i++) {
829#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
830 ClearPageDirty(pages[i]);
831#else
832 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
833#endif
834 set_page_extent_mapped(pages[i]);
835 WARN_ON(!PageLocked(pages[i]));
836 }
812 return 0; 837 return 0;
813} 838}
814 839
@@ -969,13 +994,11 @@ out_nolock:
969 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 994 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
970 } 995 }
971 current->backing_dev_info = NULL; 996 current->backing_dev_info = NULL;
972 btrfs_ordered_throttle(root, inode);
973 return num_written ? num_written : err; 997 return num_written ? num_written : err;
974} 998}
975 999
976int btrfs_release_file(struct inode * inode, struct file * filp) 1000int btrfs_release_file(struct inode * inode, struct file * filp)
977{ 1001{
978 btrfs_del_ordered_inode(inode, 0);
979 if (filp->private_data) 1002 if (filp->private_data)
980 btrfs_ioctl_trans_end(filp); 1003 btrfs_ioctl_trans_end(filp);
981 return 0; 1004 return 0;