aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fusionio.com>2012-10-12 15:27:49 -0400
committerJosef Bacik <jbacik@fusionio.com>2013-02-20 09:37:04 -0500
commit2ab28f322f9896782da904f5942f3873432addc8 (patch)
treed8d136d90b96f96d63262f8d2eb11680bed80aab
parentdfd79829b709af3c2ac55951353a874ae89f41c3 (diff)
Btrfs: wait on ordered extents at the last possible moment
Since we don't actually copy the extent information from the source tree in the fast case we don't need to wait for ordered io to be completed in order to fsync, we just need to wait for the io to be completed. So when we're logging our file just attach all of the ordered extents to the log, and then when the log syncs just wait for IO_DONE on the ordered extents and then write the super. Thanks, Signed-off-by: Josef Bacik <jbacik@fusionio.com>
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/file.c30
-rw-r--r--fs/btrfs/inode.c8
-rw-r--r--fs/btrfs/ordered-data.c68
-rw-r--r--fs/btrfs/ordered-data.h11
-rw-r--r--fs/btrfs/tree-log.c132
7 files changed, 247 insertions, 9 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 547b7b05727f..411c8d97074e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1623,6 +1623,9 @@ struct btrfs_root {
1623 1623
1624 struct list_head root_list; 1624 struct list_head root_list;
1625 1625
1626 spinlock_t log_extents_lock[2];
1627 struct list_head logged_list[2];
1628
1626 spinlock_t orphan_lock; 1629 spinlock_t orphan_lock;
1627 atomic_t orphan_inodes; 1630 atomic_t orphan_inodes;
1628 struct btrfs_block_rsv *orphan_block_rsv; 1631 struct btrfs_block_rsv *orphan_block_rsv;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a762f9137610..1db8a9938829 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1178,9 +1178,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1178 1178
1179 INIT_LIST_HEAD(&root->dirty_list); 1179 INIT_LIST_HEAD(&root->dirty_list);
1180 INIT_LIST_HEAD(&root->root_list); 1180 INIT_LIST_HEAD(&root->root_list);
1181 INIT_LIST_HEAD(&root->logged_list[0]);
1182 INIT_LIST_HEAD(&root->logged_list[1]);
1181 spin_lock_init(&root->orphan_lock); 1183 spin_lock_init(&root->orphan_lock);
1182 spin_lock_init(&root->inode_lock); 1184 spin_lock_init(&root->inode_lock);
1183 spin_lock_init(&root->accounting_lock); 1185 spin_lock_init(&root->accounting_lock);
1186 spin_lock_init(&root->log_extents_lock[0]);
1187 spin_lock_init(&root->log_extents_lock[1]);
1184 mutex_init(&root->objectid_mutex); 1188 mutex_init(&root->objectid_mutex);
1185 mutex_init(&root->log_mutex); 1189 mutex_init(&root->log_mutex);
1186 init_waitqueue_head(&root->log_writer_wait); 1190 init_waitqueue_head(&root->log_writer_wait);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b06d289f998f..083abca56055 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1655,16 +1655,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1655 struct btrfs_root *root = BTRFS_I(inode)->root; 1655 struct btrfs_root *root = BTRFS_I(inode)->root;
1656 int ret = 0; 1656 int ret = 0;
1657 struct btrfs_trans_handle *trans; 1657 struct btrfs_trans_handle *trans;
1658 bool full_sync = 0;
1658 1659
1659 trace_btrfs_sync_file(file, datasync); 1660 trace_btrfs_sync_file(file, datasync);
1660 1661
1661 /* 1662 /*
1662 * We write the dirty pages in the range and wait until they complete 1663 * We write the dirty pages in the range and wait until they complete
1663 * out of the ->i_mutex. If so, we can flush the dirty pages by 1664 * out of the ->i_mutex. If so, we can flush the dirty pages by
1664 * multi-task, and make the performance up. 1665 * multi-task, and make the performance up. See
1666 * btrfs_wait_ordered_range for an explanation of the ASYNC check.
1665 */ 1667 */
1666 atomic_inc(&BTRFS_I(inode)->sync_writers); 1668 atomic_inc(&BTRFS_I(inode)->sync_writers);
1667 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1669 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
1670 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1671 &BTRFS_I(inode)->runtime_flags))
1672 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
1668 atomic_dec(&BTRFS_I(inode)->sync_writers); 1673 atomic_dec(&BTRFS_I(inode)->sync_writers);
1669 if (ret) 1674 if (ret)
1670 return ret; 1675 return ret;
@@ -1676,7 +1681,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1676 * range being left. 1681 * range being left.
1677 */ 1682 */
1678 atomic_inc(&root->log_batch); 1683 atomic_inc(&root->log_batch);
1679 btrfs_wait_ordered_range(inode, start, end - start + 1); 1684 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1685 &BTRFS_I(inode)->runtime_flags);
1686 if (full_sync)
1687 btrfs_wait_ordered_range(inode, start, end - start + 1);
1680 atomic_inc(&root->log_batch); 1688 atomic_inc(&root->log_batch);
1681 1689
1682 /* 1690 /*
@@ -1743,13 +1751,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1743 1751
1744 if (ret != BTRFS_NO_LOG_SYNC) { 1752 if (ret != BTRFS_NO_LOG_SYNC) {
1745 if (ret > 0) { 1753 if (ret > 0) {
1754 /*
1755 * If we didn't already wait for ordered extents we need
1756 * to do that now.
1757 */
1758 if (!full_sync)
1759 btrfs_wait_ordered_range(inode, start,
1760 end - start + 1);
1746 ret = btrfs_commit_transaction(trans, root); 1761 ret = btrfs_commit_transaction(trans, root);
1747 } else { 1762 } else {
1748 ret = btrfs_sync_log(trans, root); 1763 ret = btrfs_sync_log(trans, root);
1749 if (ret == 0) 1764 if (ret == 0) {
1750 ret = btrfs_end_transaction(trans, root); 1765 ret = btrfs_end_transaction(trans, root);
1751 else 1766 } else {
1767 if (!full_sync)
1768 btrfs_wait_ordered_range(inode, start,
1769 end -
1770 start + 1);
1752 ret = btrfs_commit_transaction(trans, root); 1771 ret = btrfs_commit_transaction(trans, root);
1772 }
1753 } 1773 }
1754 } else { 1774 } else {
1755 ret = btrfs_end_transaction(trans, root); 1775 ret = btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 35d152444932..31a871ec48f2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -700,6 +700,8 @@ retry:
700 em->start = async_extent->start; 700 em->start = async_extent->start;
701 em->len = async_extent->ram_size; 701 em->len = async_extent->ram_size;
702 em->orig_start = em->start; 702 em->orig_start = em->start;
703 em->mod_start = em->start;
704 em->mod_len = em->len;
703 705
704 em->block_start = ins.objectid; 706 em->block_start = ins.objectid;
705 em->block_len = ins.offset; 707 em->block_len = ins.offset;
@@ -892,6 +894,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
892 em->orig_start = em->start; 894 em->orig_start = em->start;
893 ram_size = ins.offset; 895 ram_size = ins.offset;
894 em->len = ins.offset; 896 em->len = ins.offset;
897 em->mod_start = em->start;
898 em->mod_len = em->len;
895 899
896 em->block_start = ins.objectid; 900 em->block_start = ins.objectid;
897 em->block_len = ins.offset; 901 em->block_len = ins.offset;
@@ -1338,6 +1342,8 @@ out_check:
1338 em->block_start = disk_bytenr; 1342 em->block_start = disk_bytenr;
1339 em->orig_block_len = disk_num_bytes; 1343 em->orig_block_len = disk_num_bytes;
1340 em->bdev = root->fs_info->fs_devices->latest_bdev; 1344 em->bdev = root->fs_info->fs_devices->latest_bdev;
1345 em->mod_start = em->start;
1346 em->mod_len = em->len;
1341 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1347 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1342 set_bit(EXTENT_FLAG_FILLING, &em->flags); 1348 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1343 em->generation = -1; 1349 em->generation = -1;
@@ -5966,6 +5972,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
5966 5972
5967 em->start = start; 5973 em->start = start;
5968 em->orig_start = orig_start; 5974 em->orig_start = orig_start;
5975 em->mod_start = start;
5976 em->mod_len = len;
5969 em->len = len; 5977 em->len = len;
5970 em->block_len = block_len; 5978 em->block_len = block_len;
5971 em->block_start = block_start; 5979 em->block_start = block_start;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e5ed56729607..f14b17432117 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
196 entry->file_offset = file_offset; 196 entry->file_offset = file_offset;
197 entry->start = start; 197 entry->start = start;
198 entry->len = len; 198 entry->len = len;
199 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
200 !(type == BTRFS_ORDERED_NOCOW))
201 entry->csum_bytes_left = disk_len;
199 entry->disk_len = disk_len; 202 entry->disk_len = disk_len;
200 entry->bytes_left = len; 203 entry->bytes_left = len;
201 entry->inode = igrab(inode); 204 entry->inode = igrab(inode);
@@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
213 INIT_LIST_HEAD(&entry->root_extent_list); 216 INIT_LIST_HEAD(&entry->root_extent_list);
214 INIT_LIST_HEAD(&entry->work_list); 217 INIT_LIST_HEAD(&entry->work_list);
215 init_completion(&entry->completion); 218 init_completion(&entry->completion);
219 INIT_LIST_HEAD(&entry->log_list);
216 220
217 trace_btrfs_ordered_extent_add(inode, entry); 221 trace_btrfs_ordered_extent_add(inode, entry);
218 222
@@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
270 tree = &BTRFS_I(inode)->ordered_tree; 274 tree = &BTRFS_I(inode)->ordered_tree;
271 spin_lock_irq(&tree->lock); 275 spin_lock_irq(&tree->lock);
272 list_add_tail(&sum->list, &entry->list); 276 list_add_tail(&sum->list, &entry->list);
277 WARN_ON(entry->csum_bytes_left < sum->len);
278 entry->csum_bytes_left -= sum->len;
279 if (entry->csum_bytes_left == 0)
280 wake_up(&entry->wait);
273 spin_unlock_irq(&tree->lock); 281 spin_unlock_irq(&tree->lock);
274} 282}
275 283
@@ -405,6 +413,66 @@ out:
405 return ret == 0; 413 return ret == 0;
406} 414}
407 415
416/* Needs to either be called under a log transaction or the log_mutex */
417void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
418{
419 struct btrfs_ordered_inode_tree *tree;
420 struct btrfs_ordered_extent *ordered;
421 struct rb_node *n;
422 int index = log->log_transid % 2;
423
424 tree = &BTRFS_I(inode)->ordered_tree;
425 spin_lock_irq(&tree->lock);
426 for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
427 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
428 spin_lock(&log->log_extents_lock[index]);
429 if (list_empty(&ordered->log_list)) {
430 list_add_tail(&ordered->log_list, &log->logged_list[index]);
431 atomic_inc(&ordered->refs);
432 }
433 spin_unlock(&log->log_extents_lock[index]);
434 }
435 spin_unlock_irq(&tree->lock);
436}
437
438void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
439{
440 struct btrfs_ordered_extent *ordered;
441 int index = transid % 2;
442
443 spin_lock_irq(&log->log_extents_lock[index]);
444 while (!list_empty(&log->logged_list[index])) {
445 ordered = list_first_entry(&log->logged_list[index],
446 struct btrfs_ordered_extent,
447 log_list);
448 list_del_init(&ordered->log_list);
449 spin_unlock_irq(&log->log_extents_lock[index]);
450 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
451 &ordered->flags));
452 btrfs_put_ordered_extent(ordered);
453 spin_lock_irq(&log->log_extents_lock[index]);
454 }
455 spin_unlock_irq(&log->log_extents_lock[index]);
456}
457
458void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
459{
460 struct btrfs_ordered_extent *ordered;
461 int index = transid % 2;
462
463 spin_lock_irq(&log->log_extents_lock[index]);
464 while (!list_empty(&log->logged_list[index])) {
465 ordered = list_first_entry(&log->logged_list[index],
466 struct btrfs_ordered_extent,
467 log_list);
468 list_del_init(&ordered->log_list);
469 spin_unlock_irq(&log->log_extents_lock[index]);
470 btrfs_put_ordered_extent(ordered);
471 spin_lock_irq(&log->log_extents_lock[index]);
472 }
473 spin_unlock_irq(&log->log_extents_lock[index]);
474}
475
408/* 476/*
409 * used to drop a reference on an ordered extent. This will free 477 * used to drop a reference on an ordered extent. This will free
410 * the extent if the last reference is dropped 478 * the extent if the last reference is dropped
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index efc7c2930c17..d523dbd2314d 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -79,6 +79,8 @@ struct btrfs_ordered_sum {
79#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent 79#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent
80 * has done its due diligence in updating 80 * has done its due diligence in updating
81 * the isize. */ 81 * the isize. */
82#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
83 ordered extent */
82 84
83struct btrfs_ordered_extent { 85struct btrfs_ordered_extent {
84 /* logical offset in the file */ 86 /* logical offset in the file */
@@ -96,6 +98,9 @@ struct btrfs_ordered_extent {
96 /* number of bytes that still need writing */ 98 /* number of bytes that still need writing */
97 u64 bytes_left; 99 u64 bytes_left;
98 100
101 /* number of bytes that still need csumming */
102 u64 csum_bytes_left;
103
99 /* 104 /*
100 * the end of the ordered extent which is behind it but 105 * the end of the ordered extent which is behind it but
101 * didn't update disk_i_size. Please see the comment of 106 * didn't update disk_i_size. Please see the comment of
@@ -118,6 +123,9 @@ struct btrfs_ordered_extent {
118 /* list of checksums for insertion when the extent io is done */ 123 /* list of checksums for insertion when the extent io is done */
119 struct list_head list; 124 struct list_head list;
120 125
126 /* If we need to wait on this to be done */
127 struct list_head log_list;
128
121 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ 129 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
122 wait_queue_head_t wait; 130 wait_queue_head_t wait;
123 131
@@ -194,6 +202,9 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
194 struct btrfs_root *root, 202 struct btrfs_root *root,
195 struct inode *inode); 203 struct inode *inode);
196void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); 204void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
205void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
206void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
207void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
197int __init ordered_data_init(void); 208int __init ordered_data_init(void);
198void ordered_data_exit(void); 209void ordered_data_exit(void);
199#endif 210#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9027bb1e7466..7de720d22b74 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2281,6 +2281,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2281 unsigned long log_transid = 0; 2281 unsigned long log_transid = 0;
2282 2282
2283 mutex_lock(&root->log_mutex); 2283 mutex_lock(&root->log_mutex);
2284 log_transid = root->log_transid;
2284 index1 = root->log_transid % 2; 2285 index1 = root->log_transid % 2;
2285 if (atomic_read(&root->log_commit[index1])) { 2286 if (atomic_read(&root->log_commit[index1])) {
2286 wait_log_commit(trans, root, root->log_transid); 2287 wait_log_commit(trans, root, root->log_transid);
@@ -2308,11 +2309,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2308 /* bail out if we need to do a full commit */ 2309 /* bail out if we need to do a full commit */
2309 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2310 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2310 ret = -EAGAIN; 2311 ret = -EAGAIN;
2312 btrfs_free_logged_extents(log, log_transid);
2311 mutex_unlock(&root->log_mutex); 2313 mutex_unlock(&root->log_mutex);
2312 goto out; 2314 goto out;
2313 } 2315 }
2314 2316
2315 log_transid = root->log_transid;
2316 if (log_transid % 2 == 0) 2317 if (log_transid % 2 == 0)
2317 mark = EXTENT_DIRTY; 2318 mark = EXTENT_DIRTY;
2318 else 2319 else
@@ -2324,6 +2325,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2324 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 2325 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2325 if (ret) { 2326 if (ret) {
2326 btrfs_abort_transaction(trans, root, ret); 2327 btrfs_abort_transaction(trans, root, ret);
2328 btrfs_free_logged_extents(log, log_transid);
2327 mutex_unlock(&root->log_mutex); 2329 mutex_unlock(&root->log_mutex);
2328 goto out; 2330 goto out;
2329 } 2331 }
@@ -2363,6 +2365,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2363 } 2365 }
2364 root->fs_info->last_trans_log_full_commit = trans->transid; 2366 root->fs_info->last_trans_log_full_commit = trans->transid;
2365 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2367 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2368 btrfs_free_logged_extents(log, log_transid);
2366 mutex_unlock(&log_root_tree->log_mutex); 2369 mutex_unlock(&log_root_tree->log_mutex);
2367 ret = -EAGAIN; 2370 ret = -EAGAIN;
2368 goto out; 2371 goto out;
@@ -2373,6 +2376,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2373 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2376 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2374 wait_log_commit(trans, log_root_tree, 2377 wait_log_commit(trans, log_root_tree,
2375 log_root_tree->log_transid); 2378 log_root_tree->log_transid);
2379 btrfs_free_logged_extents(log, log_transid);
2376 mutex_unlock(&log_root_tree->log_mutex); 2380 mutex_unlock(&log_root_tree->log_mutex);
2377 ret = 0; 2381 ret = 0;
2378 goto out; 2382 goto out;
@@ -2392,6 +2396,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2392 */ 2396 */
2393 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2397 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2394 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2398 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2399 btrfs_free_logged_extents(log, log_transid);
2395 mutex_unlock(&log_root_tree->log_mutex); 2400 mutex_unlock(&log_root_tree->log_mutex);
2396 ret = -EAGAIN; 2401 ret = -EAGAIN;
2397 goto out_wake_log_root; 2402 goto out_wake_log_root;
@@ -2402,10 +2407,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2402 EXTENT_DIRTY | EXTENT_NEW); 2407 EXTENT_DIRTY | EXTENT_NEW);
2403 if (ret) { 2408 if (ret) {
2404 btrfs_abort_transaction(trans, root, ret); 2409 btrfs_abort_transaction(trans, root, ret);
2410 btrfs_free_logged_extents(log, log_transid);
2405 mutex_unlock(&log_root_tree->log_mutex); 2411 mutex_unlock(&log_root_tree->log_mutex);
2406 goto out_wake_log_root; 2412 goto out_wake_log_root;
2407 } 2413 }
2408 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2414 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2415 btrfs_wait_logged_extents(log, log_transid);
2409 2416
2410 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2417 btrfs_set_super_log_root(root->fs_info->super_for_commit,
2411 log_root_tree->node->start); 2418 log_root_tree->node->start);
@@ -2475,6 +2482,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2475 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2482 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2476 } 2483 }
2477 2484
2485 /*
2486 * We may have short-circuited the log tree with the full commit logic
2487 * and left ordered extents on our list, so clear these out to keep us
2488 * from leaking inodes and memory.
2489 */
2490 btrfs_free_logged_extents(log, 0);
2491 btrfs_free_logged_extents(log, 1);
2492
2478 free_extent_buffer(log->node); 2493 free_extent_buffer(log->node);
2479 kfree(log); 2494 kfree(log);
2480} 2495}
@@ -3271,14 +3286,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3271 struct btrfs_root *log = root->log_root; 3286 struct btrfs_root *log = root->log_root;
3272 struct btrfs_file_extent_item *fi; 3287 struct btrfs_file_extent_item *fi;
3273 struct extent_buffer *leaf; 3288 struct extent_buffer *leaf;
3289 struct btrfs_ordered_extent *ordered;
3274 struct list_head ordered_sums; 3290 struct list_head ordered_sums;
3275 struct btrfs_map_token token; 3291 struct btrfs_map_token token;
3276 struct btrfs_key key; 3292 struct btrfs_key key;
3277 u64 csum_offset = em->mod_start - em->start; 3293 u64 mod_start = em->mod_start;
3278 u64 csum_len = em->mod_len; 3294 u64 mod_len = em->mod_len;
3295 u64 csum_offset;
3296 u64 csum_len;
3279 u64 extent_offset = em->start - em->orig_start; 3297 u64 extent_offset = em->start - em->orig_start;
3280 u64 block_len; 3298 u64 block_len;
3281 int ret; 3299 int ret;
3300 int index = log->log_transid % 2;
3282 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3301 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3283 3302
3284 INIT_LIST_HEAD(&ordered_sums); 3303 INIT_LIST_HEAD(&ordered_sums);
@@ -3362,6 +3381,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3362 csum_len = block_len; 3381 csum_len = block_len;
3363 } 3382 }
3364 3383
3384 /*
3385 * First check and see if our csums are on our outstanding ordered
3386 * extents.
3387 */
3388again:
3389 spin_lock_irq(&log->log_extents_lock[index]);
3390 list_for_each_entry(ordered, &log->logged_list[index], log_list) {
3391 struct btrfs_ordered_sum *sum;
3392
3393 if (!mod_len)
3394 break;
3395
3396 if (ordered->inode != inode)
3397 continue;
3398
3399 if (ordered->file_offset + ordered->len <= mod_start ||
3400 mod_start + mod_len <= ordered->file_offset)
3401 continue;
3402
3403 /*
3404 * We are going to copy all the csums on this ordered extent, so
3405 * go ahead and adjust mod_start and mod_len in case this
3406 * ordered extent has already been logged.
3407 */
3408 if (ordered->file_offset > mod_start) {
3409 if (ordered->file_offset + ordered->len >=
3410 mod_start + mod_len)
3411 mod_len = ordered->file_offset - mod_start;
3412 /*
3413 * If we have this case
3414 *
3415 * |--------- logged extent ---------|
3416 * |----- ordered extent ----|
3417 *
3418 * Just don't mess with mod_start and mod_len, we'll
3419 * just end up logging more csums than we need and it
3420 * will be ok.
3421 */
3422 } else {
3423 if (ordered->file_offset + ordered->len <
3424 mod_start + mod_len) {
3425 mod_len = (mod_start + mod_len) -
3426 (ordered->file_offset + ordered->len);
3427 mod_start = ordered->file_offset +
3428 ordered->len;
3429 } else {
3430 mod_len = 0;
3431 }
3432 }
3433
3434 /*
3435 * To keep us from looping for the above case of an ordered
3436 * extent that falls inside of the logged extent.
3437 */
3438 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
3439 &ordered->flags))
3440 continue;
3441 atomic_inc(&ordered->refs);
3442 spin_unlock_irq(&log->log_extents_lock[index]);
3443 /*
3444 * we've dropped the lock, we must either break or
3445 * start over after this.
3446 */
3447
3448 wait_event(ordered->wait, ordered->csum_bytes_left == 0);
3449
3450 list_for_each_entry(sum, &ordered->list, list) {
3451 ret = btrfs_csum_file_blocks(trans, log, sum);
3452 if (ret) {
3453 btrfs_put_ordered_extent(ordered);
3454 goto unlocked;
3455 }
3456 }
3457 btrfs_put_ordered_extent(ordered);
3458 goto again;
3459
3460 }
3461 spin_unlock_irq(&log->log_extents_lock[index]);
3462unlocked:
3463
3464 if (!mod_len || ret)
3465 return ret;
3466
3467 csum_offset = mod_start - em->start;
3468 csum_len = mod_len;
3469
3365 /* block start is already adjusted for the file extent offset. */ 3470 /* block start is already adjusted for the file extent offset. */
3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3471 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3367 em->block_start + csum_offset, 3472 em->block_start + csum_offset,
@@ -3393,6 +3498,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3393 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 3498 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3394 u64 test_gen; 3499 u64 test_gen;
3395 int ret = 0; 3500 int ret = 0;
3501 int num = 0;
3396 3502
3397 INIT_LIST_HEAD(&extents); 3503 INIT_LIST_HEAD(&extents);
3398 3504
@@ -3401,16 +3507,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3401 3507
3402 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 3508 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3403 list_del_init(&em->list); 3509 list_del_init(&em->list);
3510
3511 /*
3512 * Just an arbitrary number, this can be really CPU intensive
3513 * once we start getting a lot of extents, and really once we
3514 * have a bunch of extents we just want to commit since it will
3515 * be faster.
3516 */
3517 if (++num > 32768) {
3518 list_del_init(&tree->modified_extents);
3519 ret = -EFBIG;
3520 goto process;
3521 }
3522
3404 if (em->generation <= test_gen) 3523 if (em->generation <= test_gen)
3405 continue; 3524 continue;
3406 /* Need a ref to keep it from getting evicted from cache */ 3525 /* Need a ref to keep it from getting evicted from cache */
3407 atomic_inc(&em->refs); 3526 atomic_inc(&em->refs);
3408 set_bit(EXTENT_FLAG_LOGGING, &em->flags); 3527 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
3409 list_add_tail(&em->list, &extents); 3528 list_add_tail(&em->list, &extents);
3529 num++;
3410 } 3530 }
3411 3531
3412 list_sort(NULL, &extents, extent_cmp); 3532 list_sort(NULL, &extents, extent_cmp);
3413 3533
3534process:
3414 while (!list_empty(&extents)) { 3535 while (!list_empty(&extents)) {
3415 em = list_entry(extents.next, struct extent_map, list); 3536 em = list_entry(extents.next, struct extent_map, list);
3416 3537
@@ -3513,6 +3634,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3513 3634
3514 mutex_lock(&BTRFS_I(inode)->log_mutex); 3635 mutex_lock(&BTRFS_I(inode)->log_mutex);
3515 3636
3637 btrfs_get_logged_extents(log, inode);
3638
3516 /* 3639 /*
3517 * a brute force approach to making sure we get the most uptodate 3640 * a brute force approach to making sure we get the most uptodate
3518 * copies of everything. 3641 * copies of everything.
@@ -3656,6 +3779,8 @@ log_extents:
3656 BTRFS_I(inode)->logged_trans = trans->transid; 3779 BTRFS_I(inode)->logged_trans = trans->transid;
3657 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 3780 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
3658out_unlock: 3781out_unlock:
3782 if (err)
3783 btrfs_free_logged_extents(log, log->log_transid);
3659 mutex_unlock(&BTRFS_I(inode)->log_mutex); 3784 mutex_unlock(&BTRFS_I(inode)->log_mutex);
3660 3785
3661 btrfs_free_path(path); 3786 btrfs_free_path(path);
@@ -3822,7 +3947,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3822end_trans: 3947end_trans:
3823 dput(old_parent); 3948 dput(old_parent);
3824 if (ret < 0) { 3949 if (ret < 0) {
3825 WARN_ON(ret != -ENOSPC);
3826 root->fs_info->last_trans_log_full_commit = trans->transid; 3950 root->fs_info->last_trans_log_full_commit = trans->transid;
3827 ret = 1; 3951 ret = 1;
3828 } 3952 }