aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1762
1 files changed, 1385 insertions, 377 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2bfdc641d4e3..c03864406af3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
252 inline_len, compressed_size, 252 inline_len, compressed_size,
253 compressed_pages); 253 compressed_pages);
254 BUG_ON(ret); 254 BUG_ON(ret);
255 btrfs_delalloc_release_metadata(inode, end + 1 - start);
255 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 256 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
256 return 0; 257 return 0;
257} 258}
@@ -414,6 +415,7 @@ again:
414 trans = btrfs_join_transaction(root, 1); 415 trans = btrfs_join_transaction(root, 1);
415 BUG_ON(!trans); 416 BUG_ON(!trans);
416 btrfs_set_trans_block_group(trans, inode); 417 btrfs_set_trans_block_group(trans, inode);
418 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
417 419
418 /* lets try to make an inline extent */ 420 /* lets try to make an inline extent */
419 if (ret || total_in < (actual_end - start)) { 421 if (ret || total_in < (actual_end - start)) {
@@ -439,7 +441,6 @@ again:
439 start, end, NULL, 441 start, end, NULL,
440 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 442 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
441 EXTENT_CLEAR_DELALLOC | 443 EXTENT_CLEAR_DELALLOC |
442 EXTENT_CLEAR_ACCOUNTING |
443 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 444 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
444 445
445 btrfs_end_transaction(trans, root); 446 btrfs_end_transaction(trans, root);
@@ -697,6 +698,38 @@ retry:
697 return 0; 698 return 0;
698} 699}
699 700
701static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
702 u64 num_bytes)
703{
704 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
705 struct extent_map *em;
706 u64 alloc_hint = 0;
707
708 read_lock(&em_tree->lock);
709 em = search_extent_mapping(em_tree, start, num_bytes);
710 if (em) {
711 /*
712 * if block start isn't an actual block number then find the
713 * first block in this inode and use that as a hint. If that
714 * block is also bogus then just don't worry about it.
715 */
716 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
717 free_extent_map(em);
718 em = search_extent_mapping(em_tree, 0, 0);
719 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
720 alloc_hint = em->block_start;
721 if (em)
722 free_extent_map(em);
723 } else {
724 alloc_hint = em->block_start;
725 free_extent_map(em);
726 }
727 }
728 read_unlock(&em_tree->lock);
729
730 return alloc_hint;
731}
732
700/* 733/*
701 * when extent_io.c finds a delayed allocation range in the file, 734 * when extent_io.c finds a delayed allocation range in the file,
702 * the call backs end up in this code. The basic idea is to 735 * the call backs end up in this code. The basic idea is to
@@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode,
734 trans = btrfs_join_transaction(root, 1); 767 trans = btrfs_join_transaction(root, 1);
735 BUG_ON(!trans); 768 BUG_ON(!trans);
736 btrfs_set_trans_block_group(trans, inode); 769 btrfs_set_trans_block_group(trans, inode);
770 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
737 771
738 actual_end = min_t(u64, isize, end + 1); 772 actual_end = min_t(u64, isize, end + 1);
739 773
@@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode,
753 EXTENT_CLEAR_UNLOCK_PAGE | 787 EXTENT_CLEAR_UNLOCK_PAGE |
754 EXTENT_CLEAR_UNLOCK | 788 EXTENT_CLEAR_UNLOCK |
755 EXTENT_CLEAR_DELALLOC | 789 EXTENT_CLEAR_DELALLOC |
756 EXTENT_CLEAR_ACCOUNTING |
757 EXTENT_CLEAR_DIRTY | 790 EXTENT_CLEAR_DIRTY |
758 EXTENT_SET_WRITEBACK | 791 EXTENT_SET_WRITEBACK |
759 EXTENT_END_WRITEBACK); 792 EXTENT_END_WRITEBACK);
@@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode,
769 BUG_ON(disk_num_bytes > 802 BUG_ON(disk_num_bytes >
770 btrfs_super_total_bytes(&root->fs_info->super_copy)); 803 btrfs_super_total_bytes(&root->fs_info->super_copy));
771 804
772 805 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
773 read_lock(&BTRFS_I(inode)->extent_tree.lock);
774 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
775 start, num_bytes);
776 if (em) {
777 /*
778 * if block start isn't an actual block number then find the
779 * first block in this inode and use that as a hint. If that
780 * block is also bogus then just don't worry about it.
781 */
782 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
783 free_extent_map(em);
784 em = search_extent_mapping(em_tree, 0, 0);
785 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
786 alloc_hint = em->block_start;
787 if (em)
788 free_extent_map(em);
789 } else {
790 alloc_hint = em->block_start;
791 free_extent_map(em);
792 }
793 }
794 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
795 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 806 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
796 807
797 while (disk_num_bytes > 0) { 808 while (disk_num_bytes > 0) {
@@ -1174,6 +1185,13 @@ out_check:
1174 num_bytes, num_bytes, type); 1185 num_bytes, num_bytes, type);
1175 BUG_ON(ret); 1186 BUG_ON(ret);
1176 1187
1188 if (root->root_key.objectid ==
1189 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1190 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1191 num_bytes);
1192 BUG_ON(ret);
1193 }
1194
1177 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1195 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1178 cur_offset, cur_offset + num_bytes - 1, 1196 cur_offset, cur_offset + num_bytes - 1,
1179 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1197 locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1226} 1244}
1227 1245
1228static int btrfs_split_extent_hook(struct inode *inode, 1246static int btrfs_split_extent_hook(struct inode *inode,
1229 struct extent_state *orig, u64 split) 1247 struct extent_state *orig, u64 split)
1230{ 1248{
1249 /* not delalloc, ignore it */
1231 if (!(orig->state & EXTENT_DELALLOC)) 1250 if (!(orig->state & EXTENT_DELALLOC))
1232 return 0; 1251 return 0;
1233 1252
1234 spin_lock(&BTRFS_I(inode)->accounting_lock); 1253 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1235 BTRFS_I(inode)->outstanding_extents++;
1236 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1237
1238 return 0; 1254 return 0;
1239} 1255}
1240 1256
@@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1252 if (!(other->state & EXTENT_DELALLOC)) 1268 if (!(other->state & EXTENT_DELALLOC))
1253 return 0; 1269 return 0;
1254 1270
1255 spin_lock(&BTRFS_I(inode)->accounting_lock); 1271 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1256 BTRFS_I(inode)->outstanding_extents--;
1257 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1258
1259 return 0; 1272 return 0;
1260} 1273}
1261 1274
@@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1264 * bytes in this file, and to maintain the list of inodes that 1277 * bytes in this file, and to maintain the list of inodes that
1265 * have pending delalloc work to be done. 1278 * have pending delalloc work to be done.
1266 */ 1279 */
1267static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, 1280static int btrfs_set_bit_hook(struct inode *inode,
1268 unsigned long old, unsigned long bits) 1281 struct extent_state *state, int *bits)
1269{ 1282{
1270 1283
1271 /* 1284 /*
@@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1273 * but in this case, we are only testeing for the DELALLOC 1286 * but in this case, we are only testeing for the DELALLOC
1274 * bit, which is only set or cleared with irqs on 1287 * bit, which is only set or cleared with irqs on
1275 */ 1288 */
1276 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1277 struct btrfs_root *root = BTRFS_I(inode)->root; 1290 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start;
1278 1292
1279 spin_lock(&BTRFS_I(inode)->accounting_lock); 1293 if (*bits & EXTENT_FIRST_DELALLOC)
1280 BTRFS_I(inode)->outstanding_extents++; 1294 *bits &= ~EXTENT_FIRST_DELALLOC;
1281 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1295 else
1282 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1296 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1283 1297
1284 spin_lock(&root->fs_info->delalloc_lock); 1298 spin_lock(&root->fs_info->delalloc_lock);
1285 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1299 BTRFS_I(inode)->delalloc_bytes += len;
1286 root->fs_info->delalloc_bytes += end - start + 1; 1300 root->fs_info->delalloc_bytes += len;
1287 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1288 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1289 &root->fs_info->delalloc_inodes); 1303 &root->fs_info->delalloc_inodes);
@@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1297 * extent_io.c clear_bit_hook, see set_bit_hook for why 1311 * extent_io.c clear_bit_hook, see set_bit_hook for why
1298 */ 1312 */
1299static int btrfs_clear_bit_hook(struct inode *inode, 1313static int btrfs_clear_bit_hook(struct inode *inode,
1300 struct extent_state *state, unsigned long bits) 1314 struct extent_state *state, int *bits)
1301{ 1315{
1302 /* 1316 /*
1303 * set_bit and clear bit hooks normally require _irqsave/restore 1317 * set_bit and clear bit hooks normally require _irqsave/restore
1304 * but in this case, we are only testeing for the DELALLOC 1318 * but in this case, we are only testeing for the DELALLOC
1305 * bit, which is only set or cleared with irqs on 1319 * bit, which is only set or cleared with irqs on
1306 */ 1320 */
1307 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1308 struct btrfs_root *root = BTRFS_I(inode)->root; 1322 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start;
1309 1324
1310 if (bits & EXTENT_DO_ACCOUNTING) { 1325 if (*bits & EXTENT_FIRST_DELALLOC)
1311 spin_lock(&BTRFS_I(inode)->accounting_lock); 1326 *bits &= ~EXTENT_FIRST_DELALLOC;
1312 WARN_ON(!BTRFS_I(inode)->outstanding_extents); 1327 else if (!(*bits & EXTENT_DO_ACCOUNTING))
1313 BTRFS_I(inode)->outstanding_extents--; 1328 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1314 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1329
1315 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1330 if (*bits & EXTENT_DO_ACCOUNTING)
1316 } 1331 btrfs_delalloc_release_metadata(inode, len);
1332
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
1334 btrfs_free_reserved_data_space(inode, len);
1317 1335
1318 spin_lock(&root->fs_info->delalloc_lock); 1336 spin_lock(&root->fs_info->delalloc_lock);
1319 if (state->end - state->start + 1 > 1337 root->fs_info->delalloc_bytes -= len;
1320 root->fs_info->delalloc_bytes) { 1338 BTRFS_I(inode)->delalloc_bytes -= len;
1321 printk(KERN_INFO "btrfs warning: delalloc account " 1339
1322 "%llu %llu\n",
1323 (unsigned long long)
1324 state->end - state->start + 1,
1325 (unsigned long long)
1326 root->fs_info->delalloc_bytes);
1327 btrfs_delalloc_free_space(root, inode, (u64)-1);
1328 root->fs_info->delalloc_bytes = 0;
1329 BTRFS_I(inode)->delalloc_bytes = 0;
1330 } else {
1331 btrfs_delalloc_free_space(root, inode,
1332 state->end -
1333 state->start + 1);
1334 root->fs_info->delalloc_bytes -= state->end -
1335 state->start + 1;
1336 BTRFS_I(inode)->delalloc_bytes -= state->end -
1337 state->start + 1;
1338 }
1339 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1340 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1340 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1341 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1384 */ 1385 */
1385static int __btrfs_submit_bio_start(struct inode *inode, int rw, 1386static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1386 struct bio *bio, int mirror_num, 1387 struct bio *bio, int mirror_num,
1387 unsigned long bio_flags) 1388 unsigned long bio_flags,
1389 u64 bio_offset)
1388{ 1390{
1389 struct btrfs_root *root = BTRFS_I(inode)->root; 1391 struct btrfs_root *root = BTRFS_I(inode)->root;
1390 int ret = 0; 1392 int ret = 0;
@@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1403 * are inserted into the btree 1405 * are inserted into the btree
1404 */ 1406 */
1405static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 1407static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1406 int mirror_num, unsigned long bio_flags) 1408 int mirror_num, unsigned long bio_flags,
1409 u64 bio_offset)
1407{ 1410{
1408 struct btrfs_root *root = BTRFS_I(inode)->root; 1411 struct btrfs_root *root = BTRFS_I(inode)->root;
1409 return btrfs_map_bio(root, rw, bio, mirror_num, 1); 1412 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
@@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1414 * on write, or reading the csums from the tree before a read 1417 * on write, or reading the csums from the tree before a read
1415 */ 1418 */
1416static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 1419static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1417 int mirror_num, unsigned long bio_flags) 1420 int mirror_num, unsigned long bio_flags,
1421 u64 bio_offset)
1418{ 1422{
1419 struct btrfs_root *root = BTRFS_I(inode)->root; 1423 struct btrfs_root *root = BTRFS_I(inode)->root;
1420 int ret = 0; 1424 int ret = 0;
@@ -1425,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1425 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1426 BUG_ON(ret); 1430 BUG_ON(ret);
1427 1431
1428 if (!(rw & (1 << BIO_RW))) { 1432 if (!(rw & REQ_WRITE)) {
1429 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1433 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1430 return btrfs_submit_compressed_read(inode, bio, 1434 return btrfs_submit_compressed_read(inode, bio,
1431 mirror_num, bio_flags); 1435 mirror_num, bio_flags);
@@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1439 /* we're doing a write, do the async checksumming */ 1443 /* we're doing a write, do the async checksumming */
1440 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 1444 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1441 inode, rw, bio, mirror_num, 1445 inode, rw, bio, mirror_num,
1442 bio_flags, __btrfs_submit_bio_start, 1446 bio_flags, bio_offset,
1447 __btrfs_submit_bio_start,
1443 __btrfs_submit_bio_done); 1448 __btrfs_submit_bio_done);
1444 } 1449 }
1445 1450
@@ -1520,6 +1525,7 @@ again:
1520 goto again; 1525 goto again;
1521 } 1526 }
1522 1527
1528 BUG();
1523 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); 1529 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1524 ClearPageChecked(page); 1530 ClearPageChecked(page);
1525out: 1531out:
@@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1650static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1656static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1651{ 1657{
1652 struct btrfs_root *root = BTRFS_I(inode)->root; 1658 struct btrfs_root *root = BTRFS_I(inode)->root;
1653 struct btrfs_trans_handle *trans; 1659 struct btrfs_trans_handle *trans = NULL;
1654 struct btrfs_ordered_extent *ordered_extent = NULL; 1660 struct btrfs_ordered_extent *ordered_extent = NULL;
1655 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1661 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1656 struct extent_state *cached_state = NULL; 1662 struct extent_state *cached_state = NULL;
@@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1668 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1669 if (!ret) { 1675 if (!ret) {
1670 trans = btrfs_join_transaction(root, 1); 1676 trans = btrfs_join_transaction(root, 1);
1677 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1671 ret = btrfs_update_inode(trans, root, inode); 1679 ret = btrfs_update_inode(trans, root, inode);
1672 BUG_ON(ret); 1680 BUG_ON(ret);
1673 btrfs_end_transaction(trans, root);
1674 } 1681 }
1675 goto out; 1682 goto out;
1676 } 1683 }
@@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1680 0, &cached_state, GFP_NOFS); 1687 0, &cached_state, GFP_NOFS);
1681 1688
1682 trans = btrfs_join_transaction(root, 1); 1689 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1683 1692
1684 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1693 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1685 compressed = 1; 1694 compressed = 1;
@@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1711 add_pending_csums(trans, inode, ordered_extent->file_offset, 1720 add_pending_csums(trans, inode, ordered_extent->file_offset,
1712 &ordered_extent->list); 1721 &ordered_extent->list);
1713 1722
1714 /* this also removes the ordered extent from the tree */
1715 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1723 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1716 ret = btrfs_update_inode(trans, root, inode); 1724 ret = btrfs_update_inode(trans, root, inode);
1717 BUG_ON(ret); 1725 BUG_ON(ret);
1718 btrfs_end_transaction(trans, root);
1719out: 1726out:
1727 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1728 if (trans)
1729 btrfs_end_transaction(trans, root);
1720 /* once for us */ 1730 /* once for us */
1721 btrfs_put_ordered_extent(ordered_extent); 1731 btrfs_put_ordered_extent(ordered_extent);
1722 /* once for the tree */ 1732 /* once for the tree */
@@ -1831,14 +1841,14 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1831 bio->bi_size = 0; 1841 bio->bi_size = 0;
1832 1842
1833 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 1843 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1834 if (failed_bio->bi_rw & (1 << BIO_RW)) 1844 if (failed_bio->bi_rw & REQ_WRITE)
1835 rw = WRITE; 1845 rw = WRITE;
1836 else 1846 else
1837 rw = READ; 1847 rw = READ;
1838 1848
1839 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1849 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1840 failrec->last_mirror, 1850 failrec->last_mirror,
1841 failrec->bio_flags); 1851 failrec->bio_flags, 0);
1842 return 0; 1852 return 0;
1843} 1853}
1844 1854
@@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
1993} 2003}
1994 2004
1995/* 2005/*
2006 * calculate extra metadata reservation when snapshotting a subvolume
2007 * contains orphan files.
2008 */
2009void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2010 struct btrfs_pending_snapshot *pending,
2011 u64 *bytes_to_reserve)
2012{
2013 struct btrfs_root *root;
2014 struct btrfs_block_rsv *block_rsv;
2015 u64 num_bytes;
2016 int index;
2017
2018 root = pending->root;
2019 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2020 return;
2021
2022 block_rsv = root->orphan_block_rsv;
2023
2024 /* orphan block reservation for the snapshot */
2025 num_bytes = block_rsv->size;
2026
2027 /*
2028 * after the snapshot is created, COWing tree blocks may use more
2029 * space than it frees. So we should make sure there is enough
2030 * reserved space.
2031 */
2032 index = trans->transid & 0x1;
2033 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2034 num_bytes += block_rsv->size -
2035 (block_rsv->reserved + block_rsv->freed[index]);
2036 }
2037
2038 *bytes_to_reserve += num_bytes;
2039}
2040
2041void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2042 struct btrfs_pending_snapshot *pending)
2043{
2044 struct btrfs_root *root = pending->root;
2045 struct btrfs_root *snap = pending->snap;
2046 struct btrfs_block_rsv *block_rsv;
2047 u64 num_bytes;
2048 int index;
2049 int ret;
2050
2051 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2052 return;
2053
2054 /* refill source subvolume's orphan block reservation */
2055 block_rsv = root->orphan_block_rsv;
2056 index = trans->transid & 0x1;
2057 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2058 num_bytes = block_rsv->size -
2059 (block_rsv->reserved + block_rsv->freed[index]);
2060 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2061 root->orphan_block_rsv,
2062 num_bytes);
2063 BUG_ON(ret);
2064 }
2065
2066 /* setup orphan block reservation for the snapshot */
2067 block_rsv = btrfs_alloc_block_rsv(snap);
2068 BUG_ON(!block_rsv);
2069
2070 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2071 snap->orphan_block_rsv = block_rsv;
2072
2073 num_bytes = root->orphan_block_rsv->size;
2074 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2075 block_rsv, num_bytes);
2076 BUG_ON(ret);
2077
2078#if 0
2079 /* insert orphan item for the snapshot */
2080 WARN_ON(!root->orphan_item_inserted);
2081 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2082 snap->root_key.objectid);
2083 BUG_ON(ret);
2084 snap->orphan_item_inserted = 1;
2085#endif
2086}
2087
2088enum btrfs_orphan_cleanup_state {
2089 ORPHAN_CLEANUP_STARTED = 1,
2090 ORPHAN_CLEANUP_DONE = 2,
2091};
2092
2093/*
2094 * This is called in transaction commmit time. If there are no orphan
2095 * files in the subvolume, it removes orphan item and frees block_rsv
2096 * structure.
2097 */
2098void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2099 struct btrfs_root *root)
2100{
2101 int ret;
2102
2103 if (!list_empty(&root->orphan_list) ||
2104 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2105 return;
2106
2107 if (root->orphan_item_inserted &&
2108 btrfs_root_refs(&root->root_item) > 0) {
2109 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2110 root->root_key.objectid);
2111 BUG_ON(ret);
2112 root->orphan_item_inserted = 0;
2113 }
2114
2115 if (root->orphan_block_rsv) {
2116 WARN_ON(root->orphan_block_rsv->size > 0);
2117 btrfs_free_block_rsv(root, root->orphan_block_rsv);
2118 root->orphan_block_rsv = NULL;
2119 }
2120}
2121
2122/*
1996 * This creates an orphan entry for the given inode in case something goes 2123 * This creates an orphan entry for the given inode in case something goes
1997 * wrong in the middle of an unlink/truncate. 2124 * wrong in the middle of an unlink/truncate.
2125 *
2126 * NOTE: caller of this function should reserve 5 units of metadata for
2127 * this function.
1998 */ 2128 */
1999int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) 2129int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2000{ 2130{
2001 struct btrfs_root *root = BTRFS_I(inode)->root; 2131 struct btrfs_root *root = BTRFS_I(inode)->root;
2002 int ret = 0; 2132 struct btrfs_block_rsv *block_rsv = NULL;
2133 int reserve = 0;
2134 int insert = 0;
2135 int ret;
2136
2137 if (!root->orphan_block_rsv) {
2138 block_rsv = btrfs_alloc_block_rsv(root);
2139 BUG_ON(!block_rsv);
2140 }
2003 2141
2004 spin_lock(&root->list_lock); 2142 spin_lock(&root->orphan_lock);
2143 if (!root->orphan_block_rsv) {
2144 root->orphan_block_rsv = block_rsv;
2145 } else if (block_rsv) {
2146 btrfs_free_block_rsv(root, block_rsv);
2147 block_rsv = NULL;
2148 }
2005 2149
2006 /* already on the orphan list, we're good */ 2150 if (list_empty(&BTRFS_I(inode)->i_orphan)) {
2007 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2151 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2008 spin_unlock(&root->list_lock); 2152#if 0
2009 return 0; 2153 /*
2154 * For proper ENOSPC handling, we should do orphan
2155 * cleanup when mounting. But this introduces backward
2156 * compatibility issue.
2157 */
2158 if (!xchg(&root->orphan_item_inserted, 1))
2159 insert = 2;
2160 else
2161 insert = 1;
2162#endif
2163 insert = 1;
2164 } else {
2165 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2010 } 2166 }
2011 2167
2012 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2168 if (!BTRFS_I(inode)->orphan_meta_reserved) {
2169 BTRFS_I(inode)->orphan_meta_reserved = 1;
2170 reserve = 1;
2171 }
2172 spin_unlock(&root->orphan_lock);
2013 2173
2014 spin_unlock(&root->list_lock); 2174 if (block_rsv)
2175 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2015 2176
2016 /* 2177 /* grab metadata reservation from transaction handle */
2017 * insert an orphan item to track this unlinked/truncated file 2178 if (reserve) {
2018 */ 2179 ret = btrfs_orphan_reserve_metadata(trans, inode);
2019 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); 2180 BUG_ON(ret);
2181 }
2020 2182
2021 return ret; 2183 /* insert an orphan item to track this unlinked/truncated file */
2184 if (insert >= 1) {
2185 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
2186 BUG_ON(ret);
2187 }
2188
2189 /* insert an orphan item to track subvolume contains orphan files */
2190 if (insert >= 2) {
2191 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2192 root->root_key.objectid);
2193 BUG_ON(ret);
2194 }
2195 return 0;
2022} 2196}
2023 2197
2024/* 2198/*
@@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2028int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) 2202int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2029{ 2203{
2030 struct btrfs_root *root = BTRFS_I(inode)->root; 2204 struct btrfs_root *root = BTRFS_I(inode)->root;
2205 int delete_item = 0;
2206 int release_rsv = 0;
2031 int ret = 0; 2207 int ret = 0;
2032 2208
2033 spin_lock(&root->list_lock); 2209 spin_lock(&root->orphan_lock);
2034 2210 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
2035 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2211 list_del_init(&BTRFS_I(inode)->i_orphan);
2036 spin_unlock(&root->list_lock); 2212 delete_item = 1;
2037 return 0;
2038 } 2213 }
2039 2214
2040 list_del_init(&BTRFS_I(inode)->i_orphan); 2215 if (BTRFS_I(inode)->orphan_meta_reserved) {
2041 if (!trans) { 2216 BTRFS_I(inode)->orphan_meta_reserved = 0;
2042 spin_unlock(&root->list_lock); 2217 release_rsv = 1;
2043 return 0;
2044 } 2218 }
2219 spin_unlock(&root->orphan_lock);
2045 2220
2046 spin_unlock(&root->list_lock); 2221 if (trans && delete_item) {
2222 ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
2223 BUG_ON(ret);
2224 }
2047 2225
2048 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2226 if (release_rsv)
2227 btrfs_orphan_release_metadata(inode);
2049 2228
2050 return ret; 2229 return 0;
2051} 2230}
2052 2231
2053/* 2232/*
@@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2064 struct inode *inode; 2243 struct inode *inode;
2065 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2244 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2066 2245
2067 if (!xchg(&root->clean_orphans, 0)) 2246 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2068 return; 2247 return;
2069 2248
2070 path = btrfs_alloc_path(); 2249 path = btrfs_alloc_path();
@@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2117 found_key.type = BTRFS_INODE_ITEM_KEY; 2296 found_key.type = BTRFS_INODE_ITEM_KEY;
2118 found_key.offset = 0; 2297 found_key.offset = 0;
2119 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 2298 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2120 if (IS_ERR(inode)) 2299 BUG_ON(IS_ERR(inode));
2121 break;
2122 2300
2123 /* 2301 /*
2124 * add this inode to the orphan list so btrfs_orphan_del does 2302 * add this inode to the orphan list so btrfs_orphan_del does
2125 * the proper thing when we hit it 2303 * the proper thing when we hit it
2126 */ 2304 */
2127 spin_lock(&root->list_lock); 2305 spin_lock(&root->orphan_lock);
2128 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2306 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2129 spin_unlock(&root->list_lock); 2307 spin_unlock(&root->orphan_lock);
2130 2308
2131 /* 2309 /*
2132 * if this is a bad inode, means we actually succeeded in 2310 * if this is a bad inode, means we actually succeeded in
@@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2135 * do a destroy_inode 2313 * do a destroy_inode
2136 */ 2314 */
2137 if (is_bad_inode(inode)) { 2315 if (is_bad_inode(inode)) {
2138 trans = btrfs_start_transaction(root, 1); 2316 trans = btrfs_start_transaction(root, 0);
2139 btrfs_orphan_del(trans, inode); 2317 btrfs_orphan_del(trans, inode);
2140 btrfs_end_transaction(trans, root); 2318 btrfs_end_transaction(trans, root);
2141 iput(inode); 2319 iput(inode);
@@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2153 /* this will do delete_inode and everything for us */ 2331 /* this will do delete_inode and everything for us */
2154 iput(inode); 2332 iput(inode);
2155 } 2333 }
2334 btrfs_free_path(path);
2335
2336 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2337
2338 if (root->orphan_block_rsv)
2339 btrfs_block_rsv_release(root, root->orphan_block_rsv,
2340 (u64)-1);
2341
2342 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2343 trans = btrfs_join_transaction(root, 1);
2344 btrfs_end_transaction(trans, root);
2345 }
2156 2346
2157 if (nr_unlink) 2347 if (nr_unlink)
2158 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 2348 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
2159 if (nr_truncate) 2349 if (nr_truncate)
2160 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 2350 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
2161
2162 btrfs_free_path(path);
2163} 2351}
2164 2352
2165/* 2353/*
@@ -2478,29 +2666,201 @@ out:
2478 return ret; 2666 return ret;
2479} 2667}
2480 2668
2481static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 2669/* helper to check if there is any shared block in the path */
2670static int check_path_shared(struct btrfs_root *root,
2671 struct btrfs_path *path)
2672{
2673 struct extent_buffer *eb;
2674 int level;
2675 int ret;
2676 u64 refs = 1;
2677
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level])
2680 break;
2681 eb = path->nodes[level];
2682 if (!btrfs_block_can_be_shared(root, eb))
2683 continue;
2684 ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
2685 &refs, NULL);
2686 if (refs > 1)
2687 return 1;
2688 }
2689 return 0;
2690}
2691
2692/*
2693 * helper to start transaction for unlink and rmdir.
2694 *
2695 * unlink and rmdir are special in btrfs, they do not always free space.
2696 * so in enospc case, we should make sure they will free space before
2697 * allowing them to use the global metadata reservation.
2698 */
2699static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2700 struct dentry *dentry)
2482{ 2701{
2483 struct btrfs_root *root;
2484 struct btrfs_trans_handle *trans; 2702 struct btrfs_trans_handle *trans;
2703 struct btrfs_root *root = BTRFS_I(dir)->root;
2704 struct btrfs_path *path;
2705 struct btrfs_inode_ref *ref;
2706 struct btrfs_dir_item *di;
2485 struct inode *inode = dentry->d_inode; 2707 struct inode *inode = dentry->d_inode;
2708 u64 index;
2709 int check_link = 1;
2710 int err = -ENOSPC;
2486 int ret; 2711 int ret;
2487 unsigned long nr = 0;
2488 2712
2489 root = BTRFS_I(dir)->root; 2713 trans = btrfs_start_transaction(root, 10);
2714 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2715 return trans;
2490 2716
2491 /* 2717 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2492 * 5 items for unlink inode 2718 return ERR_PTR(-ENOSPC);
2493 * 1 for orphan 2719
2494 */ 2720 /* check if there is someone else holds reference */
2495 ret = btrfs_reserve_metadata_space(root, 6); 2721 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
2496 if (ret) 2722 return ERR_PTR(-ENOSPC);
2497 return ret; 2723
2724 if (atomic_read(&inode->i_count) > 2)
2725 return ERR_PTR(-ENOSPC);
2726
2727 if (xchg(&root->fs_info->enospc_unlink, 1))
2728 return ERR_PTR(-ENOSPC);
2729
2730 path = btrfs_alloc_path();
2731 if (!path) {
2732 root->fs_info->enospc_unlink = 0;
2733 return ERR_PTR(-ENOMEM);
2734 }
2498 2735
2499 trans = btrfs_start_transaction(root, 1); 2736 trans = btrfs_start_transaction(root, 0);
2500 if (IS_ERR(trans)) { 2737 if (IS_ERR(trans)) {
2501 btrfs_unreserve_metadata_space(root, 6); 2738 btrfs_free_path(path);
2502 return PTR_ERR(trans); 2739 root->fs_info->enospc_unlink = 0;
2740 return trans;
2741 }
2742
2743 path->skip_locking = 1;
2744 path->search_commit_root = 1;
2745
2746 ret = btrfs_lookup_inode(trans, root, path,
2747 &BTRFS_I(dir)->location, 0);
2748 if (ret < 0) {
2749 err = ret;
2750 goto out;
2751 }
2752 if (ret == 0) {
2753 if (check_path_shared(root, path))
2754 goto out;
2755 } else {
2756 check_link = 0;
2757 }
2758 btrfs_release_path(root, path);
2759
2760 ret = btrfs_lookup_inode(trans, root, path,
2761 &BTRFS_I(inode)->location, 0);
2762 if (ret < 0) {
2763 err = ret;
2764 goto out;
2765 }
2766 if (ret == 0) {
2767 if (check_path_shared(root, path))
2768 goto out;
2769 } else {
2770 check_link = 0;
2503 } 2771 }
2772 btrfs_release_path(root, path);
2773
2774 if (ret == 0 && S_ISREG(inode->i_mode)) {
2775 ret = btrfs_lookup_file_extent(trans, root, path,
2776 inode->i_ino, (u64)-1, 0);
2777 if (ret < 0) {
2778 err = ret;
2779 goto out;
2780 }
2781 BUG_ON(ret == 0);
2782 if (check_path_shared(root, path))
2783 goto out;
2784 btrfs_release_path(root, path);
2785 }
2786
2787 if (!check_link) {
2788 err = 0;
2789 goto out;
2790 }
2791
2792 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2793 dentry->d_name.name, dentry->d_name.len, 0);
2794 if (IS_ERR(di)) {
2795 err = PTR_ERR(di);
2796 goto out;
2797 }
2798 if (di) {
2799 if (check_path_shared(root, path))
2800 goto out;
2801 } else {
2802 err = 0;
2803 goto out;
2804 }
2805 btrfs_release_path(root, path);
2806
2807 ref = btrfs_lookup_inode_ref(trans, root, path,
2808 dentry->d_name.name, dentry->d_name.len,
2809 inode->i_ino, dir->i_ino, 0);
2810 if (IS_ERR(ref)) {
2811 err = PTR_ERR(ref);
2812 goto out;
2813 }
2814 BUG_ON(!ref);
2815 if (check_path_shared(root, path))
2816 goto out;
2817 index = btrfs_inode_ref_index(path->nodes[0], ref);
2818 btrfs_release_path(root, path);
2819
2820 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
2821 dentry->d_name.name, dentry->d_name.len, 0);
2822 if (IS_ERR(di)) {
2823 err = PTR_ERR(di);
2824 goto out;
2825 }
2826 BUG_ON(ret == -ENOENT);
2827 if (check_path_shared(root, path))
2828 goto out;
2829
2830 err = 0;
2831out:
2832 btrfs_free_path(path);
2833 if (err) {
2834 btrfs_end_transaction(trans, root);
2835 root->fs_info->enospc_unlink = 0;
2836 return ERR_PTR(err);
2837 }
2838
2839 trans->block_rsv = &root->fs_info->global_block_rsv;
2840 return trans;
2841}
2842
2843static void __unlink_end_trans(struct btrfs_trans_handle *trans,
2844 struct btrfs_root *root)
2845{
2846 if (trans->block_rsv == &root->fs_info->global_block_rsv) {
2847 BUG_ON(!root->fs_info->enospc_unlink);
2848 root->fs_info->enospc_unlink = 0;
2849 }
2850 btrfs_end_transaction_throttle(trans, root);
2851}
2852
2853static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2854{
2855 struct btrfs_root *root = BTRFS_I(dir)->root;
2856 struct btrfs_trans_handle *trans;
2857 struct inode *inode = dentry->d_inode;
2858 int ret;
2859 unsigned long nr = 0;
2860
2861 trans = __unlink_start_trans(dir, dentry);
2862 if (IS_ERR(trans))
2863 return PTR_ERR(trans);
2504 2864
2505 btrfs_set_trans_block_group(trans, dir); 2865 btrfs_set_trans_block_group(trans, dir);
2506 2866
@@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2508 2868
2509 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2869 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2510 dentry->d_name.name, dentry->d_name.len); 2870 dentry->d_name.name, dentry->d_name.len);
2871 BUG_ON(ret);
2511 2872
2512 if (inode->i_nlink == 0) 2873 if (inode->i_nlink == 0) {
2513 ret = btrfs_orphan_add(trans, inode); 2874 ret = btrfs_orphan_add(trans, inode);
2875 BUG_ON(ret);
2876 }
2514 2877
2515 nr = trans->blocks_used; 2878 nr = trans->blocks_used;
2516 2879 __unlink_end_trans(trans, root);
2517 btrfs_end_transaction_throttle(trans, root);
2518 btrfs_unreserve_metadata_space(root, 6);
2519 btrfs_btree_balance_dirty(root, nr); 2880 btrfs_btree_balance_dirty(root, nr);
2520 return ret; 2881 return ret;
2521} 2882}
@@ -2577,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2577 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2938 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2578 ret = btrfs_update_inode(trans, root, dir); 2939 ret = btrfs_update_inode(trans, root, dir);
2579 BUG_ON(ret); 2940 BUG_ON(ret);
2580 dir->i_sb->s_dirt = 1;
2581 2941
2582 btrfs_free_path(path); 2942 btrfs_free_path(path);
2583 return 0; 2943 return 0;
@@ -2587,7 +2947,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2587{ 2947{
2588 struct inode *inode = dentry->d_inode; 2948 struct inode *inode = dentry->d_inode;
2589 int err = 0; 2949 int err = 0;
2590 int ret;
2591 struct btrfs_root *root = BTRFS_I(dir)->root; 2950 struct btrfs_root *root = BTRFS_I(dir)->root;
2592 struct btrfs_trans_handle *trans; 2951 struct btrfs_trans_handle *trans;
2593 unsigned long nr = 0; 2952 unsigned long nr = 0;
@@ -2596,15 +2955,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2596 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 2955 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2597 return -ENOTEMPTY; 2956 return -ENOTEMPTY;
2598 2957
2599 ret = btrfs_reserve_metadata_space(root, 5); 2958 trans = __unlink_start_trans(dir, dentry);
2600 if (ret) 2959 if (IS_ERR(trans))
2601 return ret;
2602
2603 trans = btrfs_start_transaction(root, 1);
2604 if (IS_ERR(trans)) {
2605 btrfs_unreserve_metadata_space(root, 5);
2606 return PTR_ERR(trans); 2960 return PTR_ERR(trans);
2607 }
2608 2961
2609 btrfs_set_trans_block_group(trans, dir); 2962 btrfs_set_trans_block_group(trans, dir);
2610 2963
@@ -2627,12 +2980,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2627 btrfs_i_size_write(inode, 0); 2980 btrfs_i_size_write(inode, 0);
2628out: 2981out:
2629 nr = trans->blocks_used; 2982 nr = trans->blocks_used;
2630 ret = btrfs_end_transaction_throttle(trans, root); 2983 __unlink_end_trans(trans, root);
2631 btrfs_unreserve_metadata_space(root, 5);
2632 btrfs_btree_balance_dirty(root, nr); 2984 btrfs_btree_balance_dirty(root, nr);
2633 2985
2634 if (ret && !err)
2635 err = ret;
2636 return err; 2986 return err;
2637} 2987}
2638 2988
@@ -3029,6 +3379,7 @@ out:
3029 if (pending_del_nr) { 3379 if (pending_del_nr) {
3030 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3380 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3031 pending_del_nr); 3381 pending_del_nr);
3382 BUG_ON(ret);
3032 } 3383 }
3033 btrfs_free_path(path); 3384 btrfs_free_path(path);
3034 return err; 3385 return err;
@@ -3056,11 +3407,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3056 3407
3057 if ((offset & (blocksize - 1)) == 0) 3408 if ((offset & (blocksize - 1)) == 0)
3058 goto out; 3409 goto out;
3059 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 3410 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
3060 if (ret)
3061 goto out;
3062
3063 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
3064 if (ret) 3411 if (ret)
3065 goto out; 3412 goto out;
3066 3413
@@ -3068,8 +3415,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3068again: 3415again:
3069 page = grab_cache_page(mapping, index); 3416 page = grab_cache_page(mapping, index);
3070 if (!page) { 3417 if (!page) {
3071 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3418 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3072 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3073 goto out; 3419 goto out;
3074 } 3420 }
3075 3421
@@ -3132,8 +3478,7 @@ again:
3132 3478
3133out_unlock: 3479out_unlock:
3134 if (ret) 3480 if (ret)
3135 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3481 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3136 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3137 unlock_page(page); 3482 unlock_page(page);
3138 page_cache_release(page); 3483 page_cache_release(page);
3139out: 3484out:
@@ -3145,7 +3490,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3145 struct btrfs_trans_handle *trans; 3490 struct btrfs_trans_handle *trans;
3146 struct btrfs_root *root = BTRFS_I(inode)->root; 3491 struct btrfs_root *root = BTRFS_I(inode)->root;
3147 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3492 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3148 struct extent_map *em; 3493 struct extent_map *em = NULL;
3149 struct extent_state *cached_state = NULL; 3494 struct extent_state *cached_state = NULL;
3150 u64 mask = root->sectorsize - 1; 3495 u64 mask = root->sectorsize - 1;
3151 u64 hole_start = (inode->i_size + mask) & ~mask; 3496 u64 hole_start = (inode->i_size + mask) & ~mask;
@@ -3183,11 +3528,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3183 u64 hint_byte = 0; 3528 u64 hint_byte = 0;
3184 hole_size = last_byte - cur_offset; 3529 hole_size = last_byte - cur_offset;
3185 3530
3186 err = btrfs_reserve_metadata_space(root, 2); 3531 trans = btrfs_start_transaction(root, 2);
3187 if (err) 3532 if (IS_ERR(trans)) {
3533 err = PTR_ERR(trans);
3188 break; 3534 break;
3189 3535 }
3190 trans = btrfs_start_transaction(root, 1);
3191 btrfs_set_trans_block_group(trans, inode); 3536 btrfs_set_trans_block_group(trans, inode);
3192 3537
3193 err = btrfs_drop_extents(trans, inode, cur_offset, 3538 err = btrfs_drop_extents(trans, inode, cur_offset,
@@ -3205,14 +3550,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3205 last_byte - 1, 0); 3550 last_byte - 1, 0);
3206 3551
3207 btrfs_end_transaction(trans, root); 3552 btrfs_end_transaction(trans, root);
3208 btrfs_unreserve_metadata_space(root, 2);
3209 } 3553 }
3210 free_extent_map(em); 3554 free_extent_map(em);
3555 em = NULL;
3211 cur_offset = last_byte; 3556 cur_offset = last_byte;
3212 if (cur_offset >= block_end) 3557 if (cur_offset >= block_end)
3213 break; 3558 break;
3214 } 3559 }
3215 3560
3561 free_extent_map(em);
3216 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, 3562 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3217 GFP_NOFS); 3563 GFP_NOFS);
3218 return err; 3564 return err;
@@ -3239,11 +3585,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3239 } 3585 }
3240 } 3586 }
3241 3587
3242 ret = btrfs_reserve_metadata_space(root, 1); 3588 trans = btrfs_start_transaction(root, 5);
3243 if (ret) 3589 if (IS_ERR(trans))
3244 return ret; 3590 return PTR_ERR(trans);
3245 3591
3246 trans = btrfs_start_transaction(root, 1);
3247 btrfs_set_trans_block_group(trans, inode); 3592 btrfs_set_trans_block_group(trans, inode);
3248 3593
3249 ret = btrfs_orphan_add(trans, inode); 3594 ret = btrfs_orphan_add(trans, inode);
@@ -3251,7 +3596,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3251 3596
3252 nr = trans->blocks_used; 3597 nr = trans->blocks_used;
3253 btrfs_end_transaction(trans, root); 3598 btrfs_end_transaction(trans, root);
3254 btrfs_unreserve_metadata_space(root, 1);
3255 btrfs_btree_balance_dirty(root, nr); 3599 btrfs_btree_balance_dirty(root, nr);
3256 3600
3257 if (attr->ia_size > inode->i_size) { 3601 if (attr->ia_size > inode->i_size) {
@@ -3264,8 +3608,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3264 i_size_write(inode, attr->ia_size); 3608 i_size_write(inode, attr->ia_size);
3265 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 3609 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3266 3610
3267 trans = btrfs_start_transaction(root, 1); 3611 trans = btrfs_start_transaction(root, 0);
3612 BUG_ON(IS_ERR(trans));
3268 btrfs_set_trans_block_group(trans, inode); 3613 btrfs_set_trans_block_group(trans, inode);
3614 trans->block_rsv = root->orphan_block_rsv;
3615 BUG_ON(!trans->block_rsv);
3269 3616
3270 ret = btrfs_update_inode(trans, root, inode); 3617 ret = btrfs_update_inode(trans, root, inode);
3271 BUG_ON(ret); 3618 BUG_ON(ret);
@@ -3308,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3308 if (err) 3655 if (err)
3309 return err; 3656 return err;
3310 } 3657 }
3311 attr->ia_valid &= ~ATTR_SIZE;
3312 3658
3313 if (attr->ia_valid) 3659 if (attr->ia_valid) {
3314 err = inode_setattr(inode, attr); 3660 setattr_copy(inode, attr);
3661 mark_inode_dirty(inode);
3662
3663 if (attr->ia_valid & ATTR_MODE)
3664 err = btrfs_acl_chmod(inode);
3665 }
3315 3666
3316 if (!err && ((attr->ia_valid & ATTR_MODE)))
3317 err = btrfs_acl_chmod(inode);
3318 return err; 3667 return err;
3319} 3668}
3320 3669
3321void btrfs_delete_inode(struct inode *inode) 3670void btrfs_evict_inode(struct inode *inode)
3322{ 3671{
3323 struct btrfs_trans_handle *trans; 3672 struct btrfs_trans_handle *trans;
3324 struct btrfs_root *root = BTRFS_I(inode)->root; 3673 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3326,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode)
3326 int ret; 3675 int ret;
3327 3676
3328 truncate_inode_pages(&inode->i_data, 0); 3677 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
3679 goto no_delete;
3680
3329 if (is_bad_inode(inode)) { 3681 if (is_bad_inode(inode)) {
3330 btrfs_orphan_del(NULL, inode); 3682 btrfs_orphan_del(NULL, inode);
3331 goto no_delete; 3683 goto no_delete;
3332 } 3684 }
3685 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
3333 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3686 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3334 3687
3335 if (root->fs_info->log_root_recovering) { 3688 if (root->fs_info->log_root_recovering) {
@@ -3345,10 +3698,21 @@ void btrfs_delete_inode(struct inode *inode)
3345 btrfs_i_size_write(inode, 0); 3698 btrfs_i_size_write(inode, 0);
3346 3699
3347 while (1) { 3700 while (1) {
3348 trans = btrfs_start_transaction(root, 1); 3701 trans = btrfs_start_transaction(root, 0);
3702 BUG_ON(IS_ERR(trans));
3349 btrfs_set_trans_block_group(trans, inode); 3703 btrfs_set_trans_block_group(trans, inode);
3350 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 3704 trans->block_rsv = root->orphan_block_rsv;
3705
3706 ret = btrfs_block_rsv_check(trans, root,
3707 root->orphan_block_rsv, 0, 5);
3708 if (ret) {
3709 BUG_ON(ret != -EAGAIN);
3710 ret = btrfs_commit_transaction(trans, root);
3711 BUG_ON(ret);
3712 continue;
3713 }
3351 3714
3715 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3352 if (ret != -EAGAIN) 3716 if (ret != -EAGAIN)
3353 break; 3717 break;
3354 3718
@@ -3356,6 +3720,7 @@ void btrfs_delete_inode(struct inode *inode)
3356 btrfs_end_transaction(trans, root); 3720 btrfs_end_transaction(trans, root);
3357 trans = NULL; 3721 trans = NULL;
3358 btrfs_btree_balance_dirty(root, nr); 3722 btrfs_btree_balance_dirty(root, nr);
3723
3359 } 3724 }
3360 3725
3361 if (ret == 0) { 3726 if (ret == 0) {
@@ -3367,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode)
3367 btrfs_end_transaction(trans, root); 3732 btrfs_end_transaction(trans, root);
3368 btrfs_btree_balance_dirty(root, nr); 3733 btrfs_btree_balance_dirty(root, nr);
3369no_delete: 3734no_delete:
3370 clear_inode(inode); 3735 end_writeback(inode);
3371 return; 3736 return;
3372} 3737}
3373 3738
@@ -3498,7 +3863,7 @@ again:
3498 p = &parent->rb_right; 3863 p = &parent->rb_right;
3499 else { 3864 else {
3500 WARN_ON(!(entry->vfs_inode.i_state & 3865 WARN_ON(!(entry->vfs_inode.i_state &
3501 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3866 (I_WILL_FREE | I_FREEING)));
3502 rb_erase(parent, &root->inode_tree); 3867 rb_erase(parent, &root->inode_tree);
3503 RB_CLEAR_NODE(parent); 3868 RB_CLEAR_NODE(parent);
3504 spin_unlock(&root->inode_lock); 3869 spin_unlock(&root->inode_lock);
@@ -3577,7 +3942,7 @@ again:
3577 if (atomic_read(&inode->i_count) > 1) 3942 if (atomic_read(&inode->i_count) > 1)
3578 d_prune_aliases(inode); 3943 d_prune_aliases(inode);
3579 /* 3944 /*
3580 * btrfs_drop_inode will remove it from 3945 * btrfs_drop_inode will have it removed from
3581 * the inode cache when its usage count 3946 * the inode cache when its usage count
3582 * hits zero. 3947 * hits zero.
3583 */ 3948 */
@@ -3596,40 +3961,10 @@ again:
3596 return 0; 3961 return 0;
3597} 3962}
3598 3963
3599static noinline void init_btrfs_i(struct inode *inode)
3600{
3601 struct btrfs_inode *bi = BTRFS_I(inode);
3602
3603 bi->generation = 0;
3604 bi->sequence = 0;
3605 bi->last_trans = 0;
3606 bi->last_sub_trans = 0;
3607 bi->logged_trans = 0;
3608 bi->delalloc_bytes = 0;
3609 bi->reserved_bytes = 0;
3610 bi->disk_i_size = 0;
3611 bi->flags = 0;
3612 bi->index_cnt = (u64)-1;
3613 bi->last_unlink_trans = 0;
3614 bi->ordered_data_close = 0;
3615 bi->force_compress = 0;
3616 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3617 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3618 inode->i_mapping, GFP_NOFS);
3619 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3620 inode->i_mapping, GFP_NOFS);
3621 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3622 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3623 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3624 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3625 mutex_init(&BTRFS_I(inode)->log_mutex);
3626}
3627
3628static int btrfs_init_locked_inode(struct inode *inode, void *p) 3964static int btrfs_init_locked_inode(struct inode *inode, void *p)
3629{ 3965{
3630 struct btrfs_iget_args *args = p; 3966 struct btrfs_iget_args *args = p;
3631 inode->i_ino = args->ino; 3967 inode->i_ino = args->ino;
3632 init_btrfs_i(inode);
3633 BTRFS_I(inode)->root = args->root; 3968 BTRFS_I(inode)->root = args->root;
3634 btrfs_set_inode_space_info(args->root, inode); 3969 btrfs_set_inode_space_info(args->root, inode);
3635 return 0; 3970 return 0;
@@ -3692,8 +4027,6 @@ static struct inode *new_simple_dir(struct super_block *s,
3692 if (!inode) 4027 if (!inode)
3693 return ERR_PTR(-ENOMEM); 4028 return ERR_PTR(-ENOMEM);
3694 4029
3695 init_btrfs_i(inode);
3696
3697 BTRFS_I(inode)->root = root; 4030 BTRFS_I(inode)->root = root;
3698 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); 4031 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
3699 BTRFS_I(inode)->dummy_inode = 1; 4032 BTRFS_I(inode)->dummy_inode = 1;
@@ -3950,7 +4283,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
3950 struct btrfs_trans_handle *trans; 4283 struct btrfs_trans_handle *trans;
3951 int ret = 0; 4284 int ret = 0;
3952 4285
3953 if (root->fs_info->btree_inode == inode) 4286 if (BTRFS_I(inode)->dummy_inode)
3954 return 0; 4287 return 0;
3955 4288
3956 if (wbc->sync_mode == WB_SYNC_ALL) { 4289 if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -3971,10 +4304,38 @@ void btrfs_dirty_inode(struct inode *inode)
3971{ 4304{
3972 struct btrfs_root *root = BTRFS_I(inode)->root; 4305 struct btrfs_root *root = BTRFS_I(inode)->root;
3973 struct btrfs_trans_handle *trans; 4306 struct btrfs_trans_handle *trans;
4307 int ret;
4308
4309 if (BTRFS_I(inode)->dummy_inode)
4310 return;
3974 4311
3975 trans = btrfs_join_transaction(root, 1); 4312 trans = btrfs_join_transaction(root, 1);
3976 btrfs_set_trans_block_group(trans, inode); 4313 btrfs_set_trans_block_group(trans, inode);
3977 btrfs_update_inode(trans, root, inode); 4314
4315 ret = btrfs_update_inode(trans, root, inode);
4316 if (ret && ret == -ENOSPC) {
4317 /* whoops, lets try again with the full transaction */
4318 btrfs_end_transaction(trans, root);
4319 trans = btrfs_start_transaction(root, 1);
4320 if (IS_ERR(trans)) {
4321 if (printk_ratelimit()) {
4322 printk(KERN_ERR "btrfs: fail to "
4323 "dirty inode %lu error %ld\n",
4324 inode->i_ino, PTR_ERR(trans));
4325 }
4326 return;
4327 }
4328 btrfs_set_trans_block_group(trans, inode);
4329
4330 ret = btrfs_update_inode(trans, root, inode);
4331 if (ret) {
4332 if (printk_ratelimit()) {
4333 printk(KERN_ERR "btrfs: fail to "
4334 "dirty inode %lu error %d\n",
4335 inode->i_ino, ret);
4336 }
4337 }
4338 }
3978 btrfs_end_transaction(trans, root); 4339 btrfs_end_transaction(trans, root);
3979} 4340}
3980 4341
@@ -4092,7 +4453,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4092 * btrfs_get_inode_index_count has an explanation for the magic 4453 * btrfs_get_inode_index_count has an explanation for the magic
4093 * number 4454 * number
4094 */ 4455 */
4095 init_btrfs_i(inode);
4096 BTRFS_I(inode)->index_cnt = 2; 4456 BTRFS_I(inode)->index_cnt = 2;
4097 BTRFS_I(inode)->root = root; 4457 BTRFS_I(inode)->root = root;
4098 BTRFS_I(inode)->generation = trans->transid; 4458 BTRFS_I(inode)->generation = trans->transid;
@@ -4121,16 +4481,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4121 if (ret != 0) 4481 if (ret != 0)
4122 goto fail; 4482 goto fail;
4123 4483
4124 inode->i_uid = current_fsuid(); 4484 inode_init_owner(inode, dir, mode);
4125
4126 if (dir && (dir->i_mode & S_ISGID)) {
4127 inode->i_gid = dir->i_gid;
4128 if (S_ISDIR(mode))
4129 mode |= S_ISGID;
4130 } else
4131 inode->i_gid = current_fsgid();
4132
4133 inode->i_mode = mode;
4134 inode->i_ino = objectid; 4485 inode->i_ino = objectid;
4135 inode_set_bytes(inode, 0); 4486 inode_set_bytes(inode, 0);
4136 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4487 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4256,26 +4607,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4256 if (!new_valid_dev(rdev)) 4607 if (!new_valid_dev(rdev))
4257 return -EINVAL; 4608 return -EINVAL;
4258 4609
4610 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4611 if (err)
4612 return err;
4613
4259 /* 4614 /*
4260 * 2 for inode item and ref 4615 * 2 for inode item and ref
4261 * 2 for dir items 4616 * 2 for dir items
4262 * 1 for xattr if selinux is on 4617 * 1 for xattr if selinux is on
4263 */ 4618 */
4264 err = btrfs_reserve_metadata_space(root, 5); 4619 trans = btrfs_start_transaction(root, 5);
4265 if (err) 4620 if (IS_ERR(trans))
4266 return err; 4621 return PTR_ERR(trans);
4267 4622
4268 trans = btrfs_start_transaction(root, 1);
4269 if (!trans)
4270 goto fail;
4271 btrfs_set_trans_block_group(trans, dir); 4623 btrfs_set_trans_block_group(trans, dir);
4272 4624
4273 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4274 if (err) {
4275 err = -ENOSPC;
4276 goto out_unlock;
4277 }
4278
4279 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4625 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4280 dentry->d_name.len, 4626 dentry->d_name.len,
4281 dentry->d_parent->d_inode->i_ino, objectid, 4627 dentry->d_parent->d_inode->i_ino, objectid,
@@ -4304,13 +4650,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4304out_unlock: 4650out_unlock:
4305 nr = trans->blocks_used; 4651 nr = trans->blocks_used;
4306 btrfs_end_transaction_throttle(trans, root); 4652 btrfs_end_transaction_throttle(trans, root);
4307fail: 4653 btrfs_btree_balance_dirty(root, nr);
4308 btrfs_unreserve_metadata_space(root, 5);
4309 if (drop_inode) { 4654 if (drop_inode) {
4310 inode_dec_link_count(inode); 4655 inode_dec_link_count(inode);
4311 iput(inode); 4656 iput(inode);
4312 } 4657 }
4313 btrfs_btree_balance_dirty(root, nr);
4314 return err; 4658 return err;
4315} 4659}
4316 4660
@@ -4320,32 +4664,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4320 struct btrfs_trans_handle *trans; 4664 struct btrfs_trans_handle *trans;
4321 struct btrfs_root *root = BTRFS_I(dir)->root; 4665 struct btrfs_root *root = BTRFS_I(dir)->root;
4322 struct inode *inode = NULL; 4666 struct inode *inode = NULL;
4323 int err;
4324 int drop_inode = 0; 4667 int drop_inode = 0;
4668 int err;
4325 unsigned long nr = 0; 4669 unsigned long nr = 0;
4326 u64 objectid; 4670 u64 objectid;
4327 u64 index = 0; 4671 u64 index = 0;
4328 4672
4673 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4674 if (err)
4675 return err;
4329 /* 4676 /*
4330 * 2 for inode item and ref 4677 * 2 for inode item and ref
4331 * 2 for dir items 4678 * 2 for dir items
4332 * 1 for xattr if selinux is on 4679 * 1 for xattr if selinux is on
4333 */ 4680 */
4334 err = btrfs_reserve_metadata_space(root, 5); 4681 trans = btrfs_start_transaction(root, 5);
4335 if (err) 4682 if (IS_ERR(trans))
4336 return err; 4683 return PTR_ERR(trans);
4337 4684
4338 trans = btrfs_start_transaction(root, 1);
4339 if (!trans)
4340 goto fail;
4341 btrfs_set_trans_block_group(trans, dir); 4685 btrfs_set_trans_block_group(trans, dir);
4342 4686
4343 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4344 if (err) {
4345 err = -ENOSPC;
4346 goto out_unlock;
4347 }
4348
4349 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4687 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4350 dentry->d_name.len, 4688 dentry->d_name.len,
4351 dentry->d_parent->d_inode->i_ino, 4689 dentry->d_parent->d_inode->i_ino,
@@ -4377,8 +4715,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4377out_unlock: 4715out_unlock:
4378 nr = trans->blocks_used; 4716 nr = trans->blocks_used;
4379 btrfs_end_transaction_throttle(trans, root); 4717 btrfs_end_transaction_throttle(trans, root);
4380fail:
4381 btrfs_unreserve_metadata_space(root, 5);
4382 if (drop_inode) { 4718 if (drop_inode) {
4383 inode_dec_link_count(inode); 4719 inode_dec_link_count(inode);
4384 iput(inode); 4720 iput(inode);
@@ -4405,21 +4741,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4405 if (root->objectid != BTRFS_I(inode)->root->objectid) 4741 if (root->objectid != BTRFS_I(inode)->root->objectid)
4406 return -EPERM; 4742 return -EPERM;
4407 4743
4408 /*
4409 * 1 item for inode ref
4410 * 2 items for dir items
4411 */
4412 err = btrfs_reserve_metadata_space(root, 3);
4413 if (err)
4414 return err;
4415
4416 btrfs_inc_nlink(inode); 4744 btrfs_inc_nlink(inode);
4417 4745
4418 err = btrfs_set_inode_index(dir, &index); 4746 err = btrfs_set_inode_index(dir, &index);
4419 if (err) 4747 if (err)
4420 goto fail; 4748 goto fail;
4421 4749
4422 trans = btrfs_start_transaction(root, 1); 4750 /*
4751 * 1 item for inode ref
4752 * 2 items for dir items
4753 */
4754 trans = btrfs_start_transaction(root, 3);
4755 if (IS_ERR(trans)) {
4756 err = PTR_ERR(trans);
4757 goto fail;
4758 }
4423 4759
4424 btrfs_set_trans_block_group(trans, dir); 4760 btrfs_set_trans_block_group(trans, dir);
4425 atomic_inc(&inode->i_count); 4761 atomic_inc(&inode->i_count);
@@ -4438,7 +4774,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4438 nr = trans->blocks_used; 4774 nr = trans->blocks_used;
4439 btrfs_end_transaction_throttle(trans, root); 4775 btrfs_end_transaction_throttle(trans, root);
4440fail: 4776fail:
4441 btrfs_unreserve_metadata_space(root, 3);
4442 if (drop_inode) { 4777 if (drop_inode) {
4443 inode_dec_link_count(inode); 4778 inode_dec_link_count(inode);
4444 iput(inode); 4779 iput(inode);
@@ -4458,28 +4793,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4458 u64 index = 0; 4793 u64 index = 0;
4459 unsigned long nr = 1; 4794 unsigned long nr = 1;
4460 4795
4796 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4797 if (err)
4798 return err;
4799
4461 /* 4800 /*
4462 * 2 items for inode and ref 4801 * 2 items for inode and ref
4463 * 2 items for dir items 4802 * 2 items for dir items
4464 * 1 for xattr if selinux is on 4803 * 1 for xattr if selinux is on
4465 */ 4804 */
4466 err = btrfs_reserve_metadata_space(root, 5); 4805 trans = btrfs_start_transaction(root, 5);
4467 if (err) 4806 if (IS_ERR(trans))
4468 return err; 4807 return PTR_ERR(trans);
4469
4470 trans = btrfs_start_transaction(root, 1);
4471 if (!trans) {
4472 err = -ENOMEM;
4473 goto out_unlock;
4474 }
4475 btrfs_set_trans_block_group(trans, dir); 4808 btrfs_set_trans_block_group(trans, dir);
4476 4809
4477 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4478 if (err) {
4479 err = -ENOSPC;
4480 goto out_fail;
4481 }
4482
4483 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4810 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4484 dentry->d_name.len, 4811 dentry->d_name.len,
4485 dentry->d_parent->d_inode->i_ino, objectid, 4812 dentry->d_parent->d_inode->i_ino, objectid,
@@ -4519,9 +4846,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4519out_fail: 4846out_fail:
4520 nr = trans->blocks_used; 4847 nr = trans->blocks_used;
4521 btrfs_end_transaction_throttle(trans, root); 4848 btrfs_end_transaction_throttle(trans, root);
4522
4523out_unlock:
4524 btrfs_unreserve_metadata_space(root, 5);
4525 if (drop_on_err) 4849 if (drop_on_err)
4526 iput(inode); 4850 iput(inode);
4527 btrfs_btree_balance_dirty(root, nr); 4851 btrfs_btree_balance_dirty(root, nr);
@@ -4779,6 +5103,7 @@ again:
4779 } 5103 }
4780 flush_dcache_page(page); 5104 flush_dcache_page(page);
4781 } else if (create && PageUptodate(page)) { 5105 } else if (create && PageUptodate(page)) {
5106 WARN_ON(1);
4782 if (!trans) { 5107 if (!trans) {
4783 kunmap(page); 5108 kunmap(page);
4784 free_extent_map(em); 5109 free_extent_map(em);
@@ -4875,11 +5200,651 @@ out:
4875 return em; 5200 return em;
4876} 5201}
4877 5202
5203static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5204 u64 start, u64 len)
5205{
5206 struct btrfs_root *root = BTRFS_I(inode)->root;
5207 struct btrfs_trans_handle *trans;
5208 struct extent_map *em;
5209 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5210 struct btrfs_key ins;
5211 u64 alloc_hint;
5212 int ret;
5213
5214 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5215
5216 trans = btrfs_join_transaction(root, 0);
5217 if (!trans)
5218 return ERR_PTR(-ENOMEM);
5219
5220 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5221
5222 alloc_hint = get_extent_allocation_hint(inode, start, len);
5223 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
5224 alloc_hint, (u64)-1, &ins, 1);
5225 if (ret) {
5226 em = ERR_PTR(ret);
5227 goto out;
5228 }
5229
5230 em = alloc_extent_map(GFP_NOFS);
5231 if (!em) {
5232 em = ERR_PTR(-ENOMEM);
5233 goto out;
5234 }
5235
5236 em->start = start;
5237 em->orig_start = em->start;
5238 em->len = ins.offset;
5239
5240 em->block_start = ins.objectid;
5241 em->block_len = ins.offset;
5242 em->bdev = root->fs_info->fs_devices->latest_bdev;
5243 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5244
5245 while (1) {
5246 write_lock(&em_tree->lock);
5247 ret = add_extent_mapping(em_tree, em);
5248 write_unlock(&em_tree->lock);
5249 if (ret != -EEXIST)
5250 break;
5251 btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
5252 }
5253
5254 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
5255 ins.offset, ins.offset, 0);
5256 if (ret) {
5257 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
5258 em = ERR_PTR(ret);
5259 }
5260out:
5261 btrfs_end_transaction(trans, root);
5262 return em;
5263}
5264
5265/*
5266 * returns 1 when the nocow is safe, < 1 on error, 0 if the
5267 * block must be cow'd
5268 */
5269static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5270 struct inode *inode, u64 offset, u64 len)
5271{
5272 struct btrfs_path *path;
5273 int ret;
5274 struct extent_buffer *leaf;
5275 struct btrfs_root *root = BTRFS_I(inode)->root;
5276 struct btrfs_file_extent_item *fi;
5277 struct btrfs_key key;
5278 u64 disk_bytenr;
5279 u64 backref_offset;
5280 u64 extent_end;
5281 u64 num_bytes;
5282 int slot;
5283 int found_type;
5284
5285 path = btrfs_alloc_path();
5286 if (!path)
5287 return -ENOMEM;
5288
5289 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
5290 offset, 0);
5291 if (ret < 0)
5292 goto out;
5293
5294 slot = path->slots[0];
5295 if (ret == 1) {
5296 if (slot == 0) {
5297 /* can't find the item, must cow */
5298 ret = 0;
5299 goto out;
5300 }
5301 slot--;
5302 }
5303 ret = 0;
5304 leaf = path->nodes[0];
5305 btrfs_item_key_to_cpu(leaf, &key, slot);
5306 if (key.objectid != inode->i_ino ||
5307 key.type != BTRFS_EXTENT_DATA_KEY) {
5308 /* not our file or wrong item type, must cow */
5309 goto out;
5310 }
5311
5312 if (key.offset > offset) {
5313 /* Wrong offset, must cow */
5314 goto out;
5315 }
5316
5317 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5318 found_type = btrfs_file_extent_type(leaf, fi);
5319 if (found_type != BTRFS_FILE_EXTENT_REG &&
5320 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
5321 /* not a regular extent, must cow */
5322 goto out;
5323 }
5324 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5325 backref_offset = btrfs_file_extent_offset(leaf, fi);
5326
5327 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
5328 if (extent_end < offset + len) {
5329 /* extent doesn't include our full range, must cow */
5330 goto out;
5331 }
5332
5333 if (btrfs_extent_readonly(root, disk_bytenr))
5334 goto out;
5335
5336 /*
5337 * look for other files referencing this extent, if we
5338 * find any we must cow
5339 */
5340 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
5341 key.offset - backref_offset, disk_bytenr))
5342 goto out;
5343
5344 /*
5345 * adjust disk_bytenr and num_bytes to cover just the bytes
5346 * in this extent we are about to write. If there
5347 * are any csums in that range we have to cow in order
5348 * to keep the csums correct
5349 */
5350 disk_bytenr += backref_offset;
5351 disk_bytenr += offset - key.offset;
5352 num_bytes = min(offset + len, extent_end) - offset;
5353 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
5354 goto out;
5355 /*
5356 * all of the above have passed, it is safe to overwrite this extent
5357 * without cow
5358 */
5359 ret = 1;
5360out:
5361 btrfs_free_path(path);
5362 return ret;
5363}
5364
5365static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5366 struct buffer_head *bh_result, int create)
5367{
5368 struct extent_map *em;
5369 struct btrfs_root *root = BTRFS_I(inode)->root;
5370 u64 start = iblock << inode->i_blkbits;
5371 u64 len = bh_result->b_size;
5372 struct btrfs_trans_handle *trans;
5373
5374 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5375 if (IS_ERR(em))
5376 return PTR_ERR(em);
5377
5378 /*
5379 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
5380 * io. INLINE is special, and we could probably kludge it in here, but
5381 * it's still buffered so for safety lets just fall back to the generic
5382 * buffered path.
5383 *
5384 * For COMPRESSED we _have_ to read the entire extent in so we can
5385 * decompress it, so there will be buffering required no matter what we
5386 * do, so go ahead and fallback to buffered.
5387 *
5388 * We return -ENOTBLK because thats what makes DIO go ahead and go back
5389 * to buffered IO. Don't blame me, this is the price we pay for using
5390 * the generic code.
5391 */
5392 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5393 em->block_start == EXTENT_MAP_INLINE) {
5394 free_extent_map(em);
5395 return -ENOTBLK;
5396 }
5397
5398 /* Just a good old fashioned hole, return */
5399 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5400 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5401 free_extent_map(em);
5402 /* DIO will do one hole at a time, so just unlock a sector */
5403 unlock_extent(&BTRFS_I(inode)->io_tree, start,
5404 start + root->sectorsize - 1, GFP_NOFS);
5405 return 0;
5406 }
5407
5408 /*
5409 * We don't allocate a new extent in the following cases
5410 *
5411 * 1) The inode is marked as NODATACOW. In this case we'll just use the
5412 * existing extent.
5413 * 2) The extent is marked as PREALLOC. We're good to go here and can
5414 * just use the extent.
5415 *
5416 */
5417 if (!create) {
5418 len = em->len - (start - em->start);
5419 goto map;
5420 }
5421
5422 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
5423 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
5424 em->block_start != EXTENT_MAP_HOLE)) {
5425 int type;
5426 int ret;
5427 u64 block_start;
5428
5429 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5430 type = BTRFS_ORDERED_PREALLOC;
5431 else
5432 type = BTRFS_ORDERED_NOCOW;
5433 len = min(len, em->len - (start - em->start));
5434 block_start = em->block_start + (start - em->start);
5435
5436 /*
5437 * we're not going to log anything, but we do need
5438 * to make sure the current transaction stays open
5439 * while we look for nocow cross refs
5440 */
5441 trans = btrfs_join_transaction(root, 0);
5442 if (!trans)
5443 goto must_cow;
5444
5445 if (can_nocow_odirect(trans, inode, start, len) == 1) {
5446 ret = btrfs_add_ordered_extent_dio(inode, start,
5447 block_start, len, len, type);
5448 btrfs_end_transaction(trans, root);
5449 if (ret) {
5450 free_extent_map(em);
5451 return ret;
5452 }
5453 goto unlock;
5454 }
5455 btrfs_end_transaction(trans, root);
5456 }
5457must_cow:
5458 /*
5459 * this will cow the extent, reset the len in case we changed
5460 * it above
5461 */
5462 len = bh_result->b_size;
5463 free_extent_map(em);
5464 em = btrfs_new_extent_direct(inode, start, len);
5465 if (IS_ERR(em))
5466 return PTR_ERR(em);
5467 len = min(len, em->len - (start - em->start));
5468unlock:
5469 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5470 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5471 0, NULL, GFP_NOFS);
5472map:
5473 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5474 inode->i_blkbits;
5475 bh_result->b_size = len;
5476 bh_result->b_bdev = em->bdev;
5477 set_buffer_mapped(bh_result);
5478 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5479 set_buffer_new(bh_result);
5480
5481 free_extent_map(em);
5482
5483 return 0;
5484}
5485
5486struct btrfs_dio_private {
5487 struct inode *inode;
5488 u64 logical_offset;
5489 u64 disk_bytenr;
5490 u64 bytes;
5491 u32 *csums;
5492 void *private;
5493};
5494
5495static void btrfs_endio_direct_read(struct bio *bio, int err)
5496{
5497 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5498 struct bio_vec *bvec = bio->bi_io_vec;
5499 struct btrfs_dio_private *dip = bio->bi_private;
5500 struct inode *inode = dip->inode;
5501 struct btrfs_root *root = BTRFS_I(inode)->root;
5502 u64 start;
5503 u32 *private = dip->csums;
5504
5505 start = dip->logical_offset;
5506 do {
5507 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
5508 struct page *page = bvec->bv_page;
5509 char *kaddr;
5510 u32 csum = ~(u32)0;
5511 unsigned long flags;
5512
5513 local_irq_save(flags);
5514 kaddr = kmap_atomic(page, KM_IRQ0);
5515 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5516 csum, bvec->bv_len);
5517 btrfs_csum_final(csum, (char *)&csum);
5518 kunmap_atomic(kaddr, KM_IRQ0);
5519 local_irq_restore(flags);
5520
5521 flush_dcache_page(bvec->bv_page);
5522 if (csum != *private) {
5523 printk(KERN_ERR "btrfs csum failed ino %lu off"
5524 " %llu csum %u private %u\n",
5525 inode->i_ino, (unsigned long long)start,
5526 csum, *private);
5527 err = -EIO;
5528 }
5529 }
5530
5531 start += bvec->bv_len;
5532 private++;
5533 bvec++;
5534 } while (bvec <= bvec_end);
5535
5536 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
5537 dip->logical_offset + dip->bytes - 1, GFP_NOFS);
5538 bio->bi_private = dip->private;
5539
5540 kfree(dip->csums);
5541 kfree(dip);
5542 dio_end_io(bio, err);
5543}
5544
5545static void btrfs_endio_direct_write(struct bio *bio, int err)
5546{
5547 struct btrfs_dio_private *dip = bio->bi_private;
5548 struct inode *inode = dip->inode;
5549 struct btrfs_root *root = BTRFS_I(inode)->root;
5550 struct btrfs_trans_handle *trans;
5551 struct btrfs_ordered_extent *ordered = NULL;
5552 struct extent_state *cached_state = NULL;
5553 int ret;
5554
5555 if (err)
5556 goto out_done;
5557
5558 ret = btrfs_dec_test_ordered_pending(inode, &ordered,
5559 dip->logical_offset, dip->bytes);
5560 if (!ret)
5561 goto out_done;
5562
5563 BUG_ON(!ordered);
5564
5565 trans = btrfs_join_transaction(root, 1);
5566 if (!trans) {
5567 err = -ENOMEM;
5568 goto out;
5569 }
5570 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5571
5572 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5573 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5574 if (!ret)
5575 ret = btrfs_update_inode(trans, root, inode);
5576 err = ret;
5577 goto out;
5578 }
5579
5580 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5581 ordered->file_offset + ordered->len - 1, 0,
5582 &cached_state, GFP_NOFS);
5583
5584 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5585 ret = btrfs_mark_extent_written(trans, inode,
5586 ordered->file_offset,
5587 ordered->file_offset +
5588 ordered->len);
5589 if (ret) {
5590 err = ret;
5591 goto out_unlock;
5592 }
5593 } else {
5594 ret = insert_reserved_file_extent(trans, inode,
5595 ordered->file_offset,
5596 ordered->start,
5597 ordered->disk_len,
5598 ordered->len,
5599 ordered->len,
5600 0, 0, 0,
5601 BTRFS_FILE_EXTENT_REG);
5602 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5603 ordered->file_offset, ordered->len);
5604 if (ret) {
5605 err = ret;
5606 WARN_ON(1);
5607 goto out_unlock;
5608 }
5609 }
5610
5611 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5612 btrfs_ordered_update_i_size(inode, 0, ordered);
5613 btrfs_update_inode(trans, root, inode);
5614out_unlock:
5615 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5616 ordered->file_offset + ordered->len - 1,
5617 &cached_state, GFP_NOFS);
5618out:
5619 btrfs_delalloc_release_metadata(inode, ordered->len);
5620 btrfs_end_transaction(trans, root);
5621 btrfs_put_ordered_extent(ordered);
5622 btrfs_put_ordered_extent(ordered);
5623out_done:
5624 bio->bi_private = dip->private;
5625
5626 kfree(dip->csums);
5627 kfree(dip);
5628 dio_end_io(bio, err);
5629}
5630
5631static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5632 struct bio *bio, int mirror_num,
5633 unsigned long bio_flags, u64 offset)
5634{
5635 int ret;
5636 struct btrfs_root *root = BTRFS_I(inode)->root;
5637 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
5638 BUG_ON(ret);
5639 return 0;
5640}
5641
5642static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5643 loff_t file_offset)
5644{
5645 struct btrfs_root *root = BTRFS_I(inode)->root;
5646 struct btrfs_dio_private *dip;
5647 struct bio_vec *bvec = bio->bi_io_vec;
5648 u64 start;
5649 int skip_sum;
5650 int write = rw & REQ_WRITE;
5651 int ret = 0;
5652
5653 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
5654
5655 dip = kmalloc(sizeof(*dip), GFP_NOFS);
5656 if (!dip) {
5657 ret = -ENOMEM;
5658 goto free_ordered;
5659 }
5660 dip->csums = NULL;
5661
5662 if (!skip_sum) {
5663 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5664 if (!dip->csums) {
5665 ret = -ENOMEM;
5666 goto free_ordered;
5667 }
5668 }
5669
5670 dip->private = bio->bi_private;
5671 dip->inode = inode;
5672 dip->logical_offset = file_offset;
5673
5674 start = dip->logical_offset;
5675 dip->bytes = 0;
5676 do {
5677 dip->bytes += bvec->bv_len;
5678 bvec++;
5679 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
5680
5681 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5682 bio->bi_private = dip;
5683
5684 if (write)
5685 bio->bi_end_io = btrfs_endio_direct_write;
5686 else
5687 bio->bi_end_io = btrfs_endio_direct_read;
5688
5689 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5690 if (ret)
5691 goto out_err;
5692
5693 if (write && !skip_sum) {
5694 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
5695 inode, rw, bio, 0, 0,
5696 dip->logical_offset,
5697 __btrfs_submit_bio_start_direct_io,
5698 __btrfs_submit_bio_done);
5699 if (ret)
5700 goto out_err;
5701 return;
5702 } else if (!skip_sum)
5703 btrfs_lookup_bio_sums_dio(root, inode, bio,
5704 dip->logical_offset, dip->csums);
5705
5706 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5707 if (ret)
5708 goto out_err;
5709 return;
5710out_err:
5711 kfree(dip->csums);
5712 kfree(dip);
5713free_ordered:
5714 /*
5715 * If this is a write, we need to clean up the reserved space and kill
5716 * the ordered extent.
5717 */
5718 if (write) {
5719 struct btrfs_ordered_extent *ordered;
5720 ordered = btrfs_lookup_ordered_extent(inode,
5721 dip->logical_offset);
5722 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5723 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5724 btrfs_free_reserved_extent(root, ordered->start,
5725 ordered->disk_len);
5726 btrfs_put_ordered_extent(ordered);
5727 btrfs_put_ordered_extent(ordered);
5728 }
5729 bio_endio(bio, ret);
5730}
5731
5732static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
5733 const struct iovec *iov, loff_t offset,
5734 unsigned long nr_segs)
5735{
5736 int seg;
5737 size_t size;
5738 unsigned long addr;
5739 unsigned blocksize_mask = root->sectorsize - 1;
5740 ssize_t retval = -EINVAL;
5741 loff_t end = offset;
5742
5743 if (offset & blocksize_mask)
5744 goto out;
5745
5746 /* Check the memory alignment. Blocks cannot straddle pages */
5747 for (seg = 0; seg < nr_segs; seg++) {
5748 addr = (unsigned long)iov[seg].iov_base;
5749 size = iov[seg].iov_len;
5750 end += size;
5751 if ((addr & blocksize_mask) || (size & blocksize_mask))
5752 goto out;
5753 }
5754 retval = 0;
5755out:
5756 return retval;
5757}
4878static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 5758static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
4879 const struct iovec *iov, loff_t offset, 5759 const struct iovec *iov, loff_t offset,
4880 unsigned long nr_segs) 5760 unsigned long nr_segs)
4881{ 5761{
4882 return -EINVAL; 5762 struct file *file = iocb->ki_filp;
5763 struct inode *inode = file->f_mapping->host;
5764 struct btrfs_ordered_extent *ordered;
5765 struct extent_state *cached_state = NULL;
5766 u64 lockstart, lockend;
5767 ssize_t ret;
5768 int writing = rw & WRITE;
5769 int write_bits = 0;
5770 size_t count = iov_length(iov, nr_segs);
5771
5772 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
5773 offset, nr_segs)) {
5774 return 0;
5775 }
5776
5777 lockstart = offset;
5778 lockend = offset + count - 1;
5779
5780 if (writing) {
5781 ret = btrfs_delalloc_reserve_space(inode, count);
5782 if (ret)
5783 goto out;
5784 }
5785
5786 while (1) {
5787 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5788 0, &cached_state, GFP_NOFS);
5789 /*
5790 * We're concerned with the entire range that we're going to be
5791 * doing DIO to, so we need to make sure theres no ordered
5792 * extents in this range.
5793 */
5794 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5795 lockend - lockstart + 1);
5796 if (!ordered)
5797 break;
5798 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5799 &cached_state, GFP_NOFS);
5800 btrfs_start_ordered_extent(inode, ordered, 1);
5801 btrfs_put_ordered_extent(ordered);
5802 cond_resched();
5803 }
5804
5805 /*
5806 * we don't use btrfs_set_extent_delalloc because we don't want
5807 * the dirty or uptodate bits
5808 */
5809 if (writing) {
5810 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
5811 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5812 EXTENT_DELALLOC, 0, NULL, &cached_state,
5813 GFP_NOFS);
5814 if (ret) {
5815 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5816 lockend, EXTENT_LOCKED | write_bits,
5817 1, 0, &cached_state, GFP_NOFS);
5818 goto out;
5819 }
5820 }
5821
5822 free_extent_state(cached_state);
5823 cached_state = NULL;
5824
5825 ret = __blockdev_direct_IO(rw, iocb, inode,
5826 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
5827 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
5828 btrfs_submit_direct, 0);
5829
5830 if (ret < 0 && ret != -EIOCBQUEUED) {
5831 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
5832 offset + iov_length(iov, nr_segs) - 1,
5833 EXTENT_LOCKED | write_bits, 1, 0,
5834 &cached_state, GFP_NOFS);
5835 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
5836 /*
5837 * We're falling back to buffered, unlock the section we didn't
5838 * do IO on.
5839 */
5840 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
5841 offset + iov_length(iov, nr_segs) - 1,
5842 EXTENT_LOCKED | write_bits, 1, 0,
5843 &cached_state, GFP_NOFS);
5844 }
5845out:
5846 free_extent_state(cached_state);
5847 return ret;
4883} 5848}
4884 5849
4885static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 5850static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -5043,7 +6008,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5043 u64 page_start; 6008 u64 page_start;
5044 u64 page_end; 6009 u64 page_end;
5045 6010
5046 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 6011 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
5047 if (ret) { 6012 if (ret) {
5048 if (ret == -ENOMEM) 6013 if (ret == -ENOMEM)
5049 ret = VM_FAULT_OOM; 6014 ret = VM_FAULT_OOM;
@@ -5052,13 +6017,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5052 goto out; 6017 goto out;
5053 } 6018 }
5054 6019
5055 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
5056 if (ret) {
5057 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5058 ret = VM_FAULT_SIGBUS;
5059 goto out;
5060 }
5061
5062 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 6020 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
5063again: 6021again:
5064 lock_page(page); 6022 lock_page(page);
@@ -5068,7 +6026,6 @@ again:
5068 6026
5069 if ((page->mapping != inode->i_mapping) || 6027 if ((page->mapping != inode->i_mapping) ||
5070 (page_start >= size)) { 6028 (page_start >= size)) {
5071 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5072 /* page got truncated out from underneath us */ 6029 /* page got truncated out from underneath us */
5073 goto out_unlock; 6030 goto out_unlock;
5074 } 6031 }
@@ -5109,7 +6066,6 @@ again:
5109 unlock_extent_cached(io_tree, page_start, page_end, 6066 unlock_extent_cached(io_tree, page_start, page_end,
5110 &cached_state, GFP_NOFS); 6067 &cached_state, GFP_NOFS);
5111 ret = VM_FAULT_SIGBUS; 6068 ret = VM_FAULT_SIGBUS;
5112 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5113 goto out_unlock; 6069 goto out_unlock;
5114 } 6070 }
5115 ret = 0; 6071 ret = 0;
@@ -5136,10 +6092,10 @@ again:
5136 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); 6092 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5137 6093
5138out_unlock: 6094out_unlock:
5139 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
5140 if (!ret) 6095 if (!ret)
5141 return VM_FAULT_LOCKED; 6096 return VM_FAULT_LOCKED;
5142 unlock_page(page); 6097 unlock_page(page);
6098 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
5143out: 6099out:
5144 return ret; 6100 return ret;
5145} 6101}
@@ -5164,8 +6120,10 @@ static void btrfs_truncate(struct inode *inode)
5164 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6120 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5165 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6121 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5166 6122
5167 trans = btrfs_start_transaction(root, 1); 6123 trans = btrfs_start_transaction(root, 0);
6124 BUG_ON(IS_ERR(trans));
5168 btrfs_set_trans_block_group(trans, inode); 6125 btrfs_set_trans_block_group(trans, inode);
6126 trans->block_rsv = root->orphan_block_rsv;
5169 6127
5170 /* 6128 /*
5171 * setattr is responsible for setting the ordered_data_close flag, 6129 * setattr is responsible for setting the ordered_data_close flag,
@@ -5188,6 +6146,23 @@ static void btrfs_truncate(struct inode *inode)
5188 btrfs_add_ordered_operation(trans, root, inode); 6146 btrfs_add_ordered_operation(trans, root, inode);
5189 6147
5190 while (1) { 6148 while (1) {
6149 if (!trans) {
6150 trans = btrfs_start_transaction(root, 0);
6151 BUG_ON(IS_ERR(trans));
6152 btrfs_set_trans_block_group(trans, inode);
6153 trans->block_rsv = root->orphan_block_rsv;
6154 }
6155
6156 ret = btrfs_block_rsv_check(trans, root,
6157 root->orphan_block_rsv, 0, 5);
6158 if (ret) {
6159 BUG_ON(ret != -EAGAIN);
6160 ret = btrfs_commit_transaction(trans, root);
6161 BUG_ON(ret);
6162 trans = NULL;
6163 continue;
6164 }
6165
5191 ret = btrfs_truncate_inode_items(trans, root, inode, 6166 ret = btrfs_truncate_inode_items(trans, root, inode,
5192 inode->i_size, 6167 inode->i_size,
5193 BTRFS_EXTENT_DATA_KEY); 6168 BTRFS_EXTENT_DATA_KEY);
@@ -5199,10 +6174,8 @@ static void btrfs_truncate(struct inode *inode)
5199 6174
5200 nr = trans->blocks_used; 6175 nr = trans->blocks_used;
5201 btrfs_end_transaction(trans, root); 6176 btrfs_end_transaction(trans, root);
6177 trans = NULL;
5202 btrfs_btree_balance_dirty(root, nr); 6178 btrfs_btree_balance_dirty(root, nr);
5203
5204 trans = btrfs_start_transaction(root, 1);
5205 btrfs_set_trans_block_group(trans, inode);
5206 } 6179 }
5207 6180
5208 if (ret == 0 && inode->i_nlink > 0) { 6181 if (ret == 0 && inode->i_nlink > 0) {
@@ -5263,21 +6236,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
5263struct inode *btrfs_alloc_inode(struct super_block *sb) 6236struct inode *btrfs_alloc_inode(struct super_block *sb)
5264{ 6237{
5265 struct btrfs_inode *ei; 6238 struct btrfs_inode *ei;
6239 struct inode *inode;
5266 6240
5267 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); 6241 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
5268 if (!ei) 6242 if (!ei)
5269 return NULL; 6243 return NULL;
6244
6245 ei->root = NULL;
6246 ei->space_info = NULL;
6247 ei->generation = 0;
6248 ei->sequence = 0;
5270 ei->last_trans = 0; 6249 ei->last_trans = 0;
5271 ei->last_sub_trans = 0; 6250 ei->last_sub_trans = 0;
5272 ei->logged_trans = 0; 6251 ei->logged_trans = 0;
5273 ei->outstanding_extents = 0; 6252 ei->delalloc_bytes = 0;
5274 ei->reserved_extents = 0; 6253 ei->reserved_bytes = 0;
5275 ei->root = NULL; 6254 ei->disk_i_size = 0;
6255 ei->flags = 0;
6256 ei->index_cnt = (u64)-1;
6257 ei->last_unlink_trans = 0;
6258
5276 spin_lock_init(&ei->accounting_lock); 6259 spin_lock_init(&ei->accounting_lock);
6260 atomic_set(&ei->outstanding_extents, 0);
6261 ei->reserved_extents = 0;
6262
6263 ei->ordered_data_close = 0;
6264 ei->orphan_meta_reserved = 0;
6265 ei->dummy_inode = 0;
6266 ei->force_compress = 0;
6267
6268 inode = &ei->vfs_inode;
6269 extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
6270 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
6271 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
6272 mutex_init(&ei->log_mutex);
5277 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6273 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
5278 INIT_LIST_HEAD(&ei->i_orphan); 6274 INIT_LIST_HEAD(&ei->i_orphan);
6275 INIT_LIST_HEAD(&ei->delalloc_inodes);
5279 INIT_LIST_HEAD(&ei->ordered_operations); 6276 INIT_LIST_HEAD(&ei->ordered_operations);
5280 return &ei->vfs_inode; 6277 RB_CLEAR_NODE(&ei->rb_node);
6278
6279 return inode;
5281} 6280}
5282 6281
5283void btrfs_destroy_inode(struct inode *inode) 6282void btrfs_destroy_inode(struct inode *inode)
@@ -5287,6 +6286,8 @@ void btrfs_destroy_inode(struct inode *inode)
5287 6286
5288 WARN_ON(!list_empty(&inode->i_dentry)); 6287 WARN_ON(!list_empty(&inode->i_dentry));
5289 WARN_ON(inode->i_data.nrpages); 6288 WARN_ON(inode->i_data.nrpages);
6289 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
6290 WARN_ON(BTRFS_I(inode)->reserved_extents);
5290 6291
5291 /* 6292 /*
5292 * This can happen where we create an inode, but somebody else also 6293 * This can happen where we create an inode, but somebody else also
@@ -5307,13 +6308,13 @@ void btrfs_destroy_inode(struct inode *inode)
5307 spin_unlock(&root->fs_info->ordered_extent_lock); 6308 spin_unlock(&root->fs_info->ordered_extent_lock);
5308 } 6309 }
5309 6310
5310 spin_lock(&root->list_lock); 6311 spin_lock(&root->orphan_lock);
5311 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6312 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5312 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6313 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5313 inode->i_ino); 6314 inode->i_ino);
5314 list_del_init(&BTRFS_I(inode)->i_orphan); 6315 list_del_init(&BTRFS_I(inode)->i_orphan);
5315 } 6316 }
5316 spin_unlock(&root->list_lock); 6317 spin_unlock(&root->orphan_lock);
5317 6318
5318 while (1) { 6319 while (1) {
5319 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6320 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -5335,13 +6336,14 @@ free:
5335 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6336 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
5336} 6337}
5337 6338
5338void btrfs_drop_inode(struct inode *inode) 6339int btrfs_drop_inode(struct inode *inode)
5339{ 6340{
5340 struct btrfs_root *root = BTRFS_I(inode)->root; 6341 struct btrfs_root *root = BTRFS_I(inode)->root;
5341 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 6342
5342 generic_delete_inode(inode); 6343 if (btrfs_root_refs(&root->root_item) == 0)
6344 return 1;
5343 else 6345 else
5344 generic_drop_inode(inode); 6346 return generic_drop_inode(inode);
5345} 6347}
5346 6348
5347static void init_once(void *foo) 6349static void init_once(void *foo)
@@ -5434,19 +6436,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5434 if (S_ISDIR(old_inode->i_mode) && new_inode && 6436 if (S_ISDIR(old_inode->i_mode) && new_inode &&
5435 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) 6437 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
5436 return -ENOTEMPTY; 6438 return -ENOTEMPTY;
5437
5438 /*
5439 * We want to reserve the absolute worst case amount of items. So if
5440 * both inodes are subvols and we need to unlink them then that would
5441 * require 4 item modifications, but if they are both normal inodes it
5442 * would require 5 item modifications, so we'll assume their normal
5443 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
5444 * should cover the worst case number of items we'll modify.
5445 */
5446 ret = btrfs_reserve_metadata_space(root, 11);
5447 if (ret)
5448 return ret;
5449
5450 /* 6439 /*
5451 * we're using rename to replace one file with another. 6440 * we're using rename to replace one file with another.
5452 * and the replacement file is large. Start IO on it now so 6441 * and the replacement file is large. Start IO on it now so
@@ -5459,8 +6448,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5459 /* close the racy window with snapshot create/destroy ioctl */ 6448 /* close the racy window with snapshot create/destroy ioctl */
5460 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6449 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5461 down_read(&root->fs_info->subvol_sem); 6450 down_read(&root->fs_info->subvol_sem);
6451 /*
6452 * We want to reserve the absolute worst case amount of items. So if
6453 * both inodes are subvols and we need to unlink them then that would
6454 * require 4 item modifications, but if they are both normal inodes it
6455 * would require 5 item modifications, so we'll assume their normal
6456 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
6457 * should cover the worst case number of items we'll modify.
6458 */
6459 trans = btrfs_start_transaction(root, 20);
6460 if (IS_ERR(trans))
6461 return PTR_ERR(trans);
5462 6462
5463 trans = btrfs_start_transaction(root, 1);
5464 btrfs_set_trans_block_group(trans, new_dir); 6463 btrfs_set_trans_block_group(trans, new_dir);
5465 6464
5466 if (dest != root) 6465 if (dest != root)
@@ -5559,7 +6558,6 @@ out_fail:
5559 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6558 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5560 up_read(&root->fs_info->subvol_sem); 6559 up_read(&root->fs_info->subvol_sem);
5561 6560
5562 btrfs_unreserve_metadata_space(root, 11);
5563 return ret; 6561 return ret;
5564} 6562}
5565 6563
@@ -5611,6 +6609,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5611 return 0; 6609 return 0;
5612} 6610}
5613 6611
6612int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
6613{
6614 struct btrfs_inode *binode;
6615 struct inode *inode = NULL;
6616
6617 spin_lock(&root->fs_info->delalloc_lock);
6618 while (!list_empty(&root->fs_info->delalloc_inodes)) {
6619 binode = list_entry(root->fs_info->delalloc_inodes.next,
6620 struct btrfs_inode, delalloc_inodes);
6621 inode = igrab(&binode->vfs_inode);
6622 if (inode) {
6623 list_move_tail(&binode->delalloc_inodes,
6624 &root->fs_info->delalloc_inodes);
6625 break;
6626 }
6627
6628 list_del_init(&binode->delalloc_inodes);
6629 cond_resched_lock(&root->fs_info->delalloc_lock);
6630 }
6631 spin_unlock(&root->fs_info->delalloc_lock);
6632
6633 if (inode) {
6634 write_inode_now(inode, 0);
6635 if (delay_iput)
6636 btrfs_add_delayed_iput(inode);
6637 else
6638 iput(inode);
6639 return 1;
6640 }
6641 return 0;
6642}
6643
5614static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 6644static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5615 const char *symname) 6645 const char *symname)
5616{ 6646{
@@ -5634,26 +6664,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5634 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 6664 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
5635 return -ENAMETOOLONG; 6665 return -ENAMETOOLONG;
5636 6666
6667 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
6668 if (err)
6669 return err;
5637 /* 6670 /*
5638 * 2 items for inode item and ref 6671 * 2 items for inode item and ref
5639 * 2 items for dir items 6672 * 2 items for dir items
5640 * 1 item for xattr if selinux is on 6673 * 1 item for xattr if selinux is on
5641 */ 6674 */
5642 err = btrfs_reserve_metadata_space(root, 5); 6675 trans = btrfs_start_transaction(root, 5);
5643 if (err) 6676 if (IS_ERR(trans))
5644 return err; 6677 return PTR_ERR(trans);
5645 6678
5646 trans = btrfs_start_transaction(root, 1);
5647 if (!trans)
5648 goto out_fail;
5649 btrfs_set_trans_block_group(trans, dir); 6679 btrfs_set_trans_block_group(trans, dir);
5650 6680
5651 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
5652 if (err) {
5653 err = -ENOSPC;
5654 goto out_unlock;
5655 }
5656
5657 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 6681 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
5658 dentry->d_name.len, 6682 dentry->d_name.len,
5659 dentry->d_parent->d_inode->i_ino, objectid, 6683 dentry->d_parent->d_inode->i_ino, objectid,
@@ -5725,8 +6749,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5725out_unlock: 6749out_unlock:
5726 nr = trans->blocks_used; 6750 nr = trans->blocks_used;
5727 btrfs_end_transaction_throttle(trans, root); 6751 btrfs_end_transaction_throttle(trans, root);
5728out_fail:
5729 btrfs_unreserve_metadata_space(root, 5);
5730 if (drop_inode) { 6752 if (drop_inode) {
5731 inode_dec_link_count(inode); 6753 inode_dec_link_count(inode);
5732 iput(inode); 6754 iput(inode);
@@ -5735,33 +6757,28 @@ out_fail:
5735 return err; 6757 return err;
5736} 6758}
5737 6759
5738static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 6760int btrfs_prealloc_file_range(struct inode *inode, int mode,
5739 u64 alloc_hint, int mode, loff_t actual_len) 6761 u64 start, u64 num_bytes, u64 min_size,
6762 loff_t actual_len, u64 *alloc_hint)
5740{ 6763{
5741 struct btrfs_trans_handle *trans; 6764 struct btrfs_trans_handle *trans;
5742 struct btrfs_root *root = BTRFS_I(inode)->root; 6765 struct btrfs_root *root = BTRFS_I(inode)->root;
5743 struct btrfs_key ins; 6766 struct btrfs_key ins;
5744 u64 cur_offset = start; 6767 u64 cur_offset = start;
5745 u64 num_bytes = end - start;
5746 int ret = 0; 6768 int ret = 0;
5747 u64 i_size;
5748 6769
5749 while (num_bytes > 0) { 6770 while (num_bytes > 0) {
5750 trans = btrfs_start_transaction(root, 1); 6771 trans = btrfs_start_transaction(root, 3);
5751 6772 if (IS_ERR(trans)) {
5752 ret = btrfs_reserve_extent(trans, root, num_bytes, 6773 ret = PTR_ERR(trans);
5753 root->sectorsize, 0, alloc_hint, 6774 break;
5754 (u64)-1, &ins, 1);
5755 if (ret) {
5756 WARN_ON(1);
5757 goto stop_trans;
5758 } 6775 }
5759 6776
5760 ret = btrfs_reserve_metadata_space(root, 3); 6777 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6778 0, *alloc_hint, (u64)-1, &ins, 1);
5761 if (ret) { 6779 if (ret) {
5762 btrfs_free_reserved_extent(root, ins.objectid, 6780 btrfs_end_transaction(trans, root);
5763 ins.offset); 6781 break;
5764 goto stop_trans;
5765 } 6782 }
5766 6783
5767 ret = insert_reserved_file_extent(trans, inode, 6784 ret = insert_reserved_file_extent(trans, inode,
@@ -5775,34 +6792,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5775 6792
5776 num_bytes -= ins.offset; 6793 num_bytes -= ins.offset;
5777 cur_offset += ins.offset; 6794 cur_offset += ins.offset;
5778 alloc_hint = ins.objectid + ins.offset; 6795 *alloc_hint = ins.objectid + ins.offset;
5779 6796
5780 inode->i_ctime = CURRENT_TIME; 6797 inode->i_ctime = CURRENT_TIME;
5781 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 6798 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5782 if (!(mode & FALLOC_FL_KEEP_SIZE) && 6799 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5783 (actual_len > inode->i_size) && 6800 (actual_len > inode->i_size) &&
5784 (cur_offset > inode->i_size)) { 6801 (cur_offset > inode->i_size)) {
5785
5786 if (cur_offset > actual_len) 6802 if (cur_offset > actual_len)
5787 i_size = actual_len; 6803 i_size_write(inode, actual_len);
5788 else 6804 else
5789 i_size = cur_offset; 6805 i_size_write(inode, cur_offset);
5790 i_size_write(inode, i_size); 6806 i_size_write(inode, cur_offset);
5791 btrfs_ordered_update_i_size(inode, i_size, NULL); 6807 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5792 } 6808 }
5793 6809
5794 ret = btrfs_update_inode(trans, root, inode); 6810 ret = btrfs_update_inode(trans, root, inode);
5795 BUG_ON(ret); 6811 BUG_ON(ret);
5796 6812
5797 btrfs_end_transaction(trans, root); 6813 btrfs_end_transaction(trans, root);
5798 btrfs_unreserve_metadata_space(root, 3);
5799 } 6814 }
5800 return ret; 6815 return ret;
5801
5802stop_trans:
5803 btrfs_end_transaction(trans, root);
5804 return ret;
5805
5806} 6816}
5807 6817
5808static long btrfs_fallocate(struct inode *inode, int mode, 6818static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5835,8 +6845,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5835 goto out; 6845 goto out;
5836 } 6846 }
5837 6847
5838 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, 6848 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
5839 alloc_end - alloc_start);
5840 if (ret) 6849 if (ret)
5841 goto out; 6850 goto out;
5842 6851
@@ -5881,16 +6890,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5881 if (em->block_start == EXTENT_MAP_HOLE || 6890 if (em->block_start == EXTENT_MAP_HOLE ||
5882 (cur_offset >= inode->i_size && 6891 (cur_offset >= inode->i_size &&
5883 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6892 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5884 ret = prealloc_file_range(inode, 6893 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
5885 cur_offset, last_byte, 6894 last_byte - cur_offset,
5886 alloc_hint, mode, offset+len); 6895 1 << inode->i_blkbits,
6896 offset + len,
6897 &alloc_hint);
5887 if (ret < 0) { 6898 if (ret < 0) {
5888 free_extent_map(em); 6899 free_extent_map(em);
5889 break; 6900 break;
5890 } 6901 }
5891 } 6902 }
5892 if (em->block_start <= EXTENT_MAP_LAST_BYTE)
5893 alloc_hint = em->block_start;
5894 free_extent_map(em); 6903 free_extent_map(em);
5895 6904
5896 cur_offset = last_byte; 6905 cur_offset = last_byte;
@@ -5902,8 +6911,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5902 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 6911 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5903 &cached_state, GFP_NOFS); 6912 &cached_state, GFP_NOFS);
5904 6913
5905 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 6914 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
5906 alloc_end - alloc_start);
5907out: 6915out:
5908 mutex_unlock(&inode->i_mutex); 6916 mutex_unlock(&inode->i_mutex);
5909 return ret; 6917 return ret;