aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-09-02 16:53:46 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-11 13:31:07 -0400
commit8b62b72b26bcd72082c4a69d179dd906bcc22200 (patch)
treeceee20dfebe45654cb3a25d8916c195836cdbabf /fs/btrfs/ordered-data.c
parent9655d2982b53fdb38a9e0f2f11315b99b92d66e2 (diff)
Btrfs: Use PagePrivate2 to track pages in the data=ordered code.
Btrfs writes go through delalloc to the data=ordered code. This makes sure that all of the data is on disk before the metadata that references it. The tracking means that we have to make sure each page in an extent is fully written before we add that extent into the on-disk btree. This was done in the past by setting the EXTENT_ORDERED bit for the range of an extent when it was added to the data=ordered code, and then clearing the EXTENT_ORDERED bit in the extent state tree as each page finished IO. One of the reasons we had to do this was because sometimes pages are magically dirtied without page_mkwrite being called. The EXTENT_ORDERED bit is checked at writepage time, and if it isn't there, our page become dirty without going through the proper path. These bit operations make for a number of rbtree searches for each page, and can cause considerable lock contention. This commit switches from the EXTENT_ORDERED bit to use PagePrivate2. As pages go into the ordered code, PagePrivate2 is set on each one. This is a cheap operation because we already have all the pages locked and ready to go. As IO finishes, the PagePrivate2 bit is cleared and the ordered accoutning is updated for each page. At writepage time, if the PagePrivate2 bit is missing, we go into the writepage fixup code to handle improperly dirtied pages. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7f751e462f0b..4a9c8c4cec25 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
159 * 159 *
160 * len is the length of the extent 160 * len is the length of the extent
161 * 161 *
162 * This also sets the EXTENT_ORDERED bit on the range in the inode.
163 *
164 * The tree is given a single reference on the ordered extent that was 162 * The tree is given a single reference on the ordered extent that was
165 * inserted. 163 * inserted.
166 */ 164 */
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
181 entry->start = start; 179 entry->start = start;
182 entry->len = len; 180 entry->len = len;
183 entry->disk_len = disk_len; 181 entry->disk_len = disk_len;
182 entry->bytes_left = len;
184 entry->inode = inode; 183 entry->inode = inode;
185 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 184 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
186 set_bit(type, &entry->flags); 185 set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
195 &entry->rb_node); 194 &entry->rb_node);
196 BUG_ON(node); 195 BUG_ON(node);
197 196
198 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
199 entry_end(entry) - 1, GFP_NOFS);
200
201 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
202 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
203 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
241 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
242 struct rb_node *node; 238 struct rb_node *node;
243 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry;
244 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
245 int ret; 240 int ret;
246 241
247 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
248 mutex_lock(&tree->mutex); 243 mutex_lock(&tree->mutex);
249 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
250 GFP_NOFS);
251 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
252 if (!node) { 245 if (!node) {
253 ret = 1; 246 ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
260 goto out; 253 goto out;
261 } 254 }
262 255
263 ret = test_range_bit(io_tree, entry->file_offset, 256 if (io_size > entry->bytes_left) {
264 entry->file_offset + entry->len - 1, 257 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
265 EXTENT_ORDERED, 0, NULL); 258 (unsigned long long)entry->bytes_left,
266 if (ret == 0) 259 (unsigned long long)io_size);
260 }
261 entry->bytes_left -= io_size;
262 if (entry->bytes_left == 0)
267 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 263 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
264 else
265 ret = 1;
268out: 266out:
269 mutex_unlock(&tree->mutex); 267 mutex_unlock(&tree->mutex);
270 return ret == 0; 268 return ret == 0;
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
476 u64 orig_end; 474 u64 orig_end;
477 u64 wait_end; 475 u64 wait_end;
478 struct btrfs_ordered_extent *ordered; 476 struct btrfs_ordered_extent *ordered;
477 int found;
479 478
480 if (start + len < start) { 479 if (start + len < start) {
481 orig_end = INT_LIMIT(loff_t); 480 orig_end = INT_LIMIT(loff_t);
@@ -502,6 +501,7 @@ again:
502 orig_end >> PAGE_CACHE_SHIFT); 501 orig_end >> PAGE_CACHE_SHIFT);
503 502
504 end = orig_end; 503 end = orig_end;
504 found = 0;
505 while (1) { 505 while (1) {
506 ordered = btrfs_lookup_first_ordered_extent(inode, end); 506 ordered = btrfs_lookup_first_ordered_extent(inode, end);
507 if (!ordered) 507 if (!ordered)
@@ -514,6 +514,7 @@ again:
514 btrfs_put_ordered_extent(ordered); 514 btrfs_put_ordered_extent(ordered);
515 break; 515 break;
516 } 516 }
517 found++;
517 btrfs_start_ordered_extent(inode, ordered, 1); 518 btrfs_start_ordered_extent(inode, ordered, 1);
518 end = ordered->file_offset; 519 end = ordered->file_offset;
519 btrfs_put_ordered_extent(ordered); 520 btrfs_put_ordered_extent(ordered);
@@ -521,8 +522,8 @@ again:
521 break; 522 break;
522 end--; 523 end--;
523 } 524 }
524 if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, 525 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
525 EXTENT_ORDERED | EXTENT_DELALLOC, 0, NULL)) { 526 EXTENT_DELALLOC, 0, NULL)) {
526 schedule_timeout(1); 527 schedule_timeout(1);
527 goto again; 528 goto again;
528 } 529 }