aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1780
1 files changed, 1366 insertions, 414 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 02bb099845fd..1bff92ad4744 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -251,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
251 inline_len, compressed_size, 252 inline_len, compressed_size,
252 compressed_pages); 253 compressed_pages);
253 BUG_ON(ret); 254 BUG_ON(ret);
255 btrfs_delalloc_release_metadata(inode, end + 1 - start);
254 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 256 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
255 return 0; 257 return 0;
256} 258}
@@ -413,6 +415,7 @@ again:
413 trans = btrfs_join_transaction(root, 1); 415 trans = btrfs_join_transaction(root, 1);
414 BUG_ON(!trans); 416 BUG_ON(!trans);
415 btrfs_set_trans_block_group(trans, inode); 417 btrfs_set_trans_block_group(trans, inode);
418 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
416 419
417 /* lets try to make an inline extent */ 420 /* lets try to make an inline extent */
418 if (ret || total_in < (actual_end - start)) { 421 if (ret || total_in < (actual_end - start)) {
@@ -438,7 +441,6 @@ again:
438 start, end, NULL, 441 start, end, NULL,
439 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 442 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
440 EXTENT_CLEAR_DELALLOC | 443 EXTENT_CLEAR_DELALLOC |
441 EXTENT_CLEAR_ACCOUNTING |
442 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 444 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
443 445
444 btrfs_end_transaction(trans, root); 446 btrfs_end_transaction(trans, root);
@@ -696,6 +698,38 @@ retry:
696 return 0; 698 return 0;
697} 699}
698 700
701static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
702 u64 num_bytes)
703{
704 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
705 struct extent_map *em;
706 u64 alloc_hint = 0;
707
708 read_lock(&em_tree->lock);
709 em = search_extent_mapping(em_tree, start, num_bytes);
710 if (em) {
711 /*
712 * if block start isn't an actual block number then find the
713 * first block in this inode and use that as a hint. If that
714 * block is also bogus then just don't worry about it.
715 */
716 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
717 free_extent_map(em);
718 em = search_extent_mapping(em_tree, 0, 0);
719 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
720 alloc_hint = em->block_start;
721 if (em)
722 free_extent_map(em);
723 } else {
724 alloc_hint = em->block_start;
725 free_extent_map(em);
726 }
727 }
728 read_unlock(&em_tree->lock);
729
730 return alloc_hint;
731}
732
699/* 733/*
700 * when extent_io.c finds a delayed allocation range in the file, 734 * when extent_io.c finds a delayed allocation range in the file,
701 * the call backs end up in this code. The basic idea is to 735 * the call backs end up in this code. The basic idea is to
@@ -733,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode,
733 trans = btrfs_join_transaction(root, 1); 767 trans = btrfs_join_transaction(root, 1);
734 BUG_ON(!trans); 768 BUG_ON(!trans);
735 btrfs_set_trans_block_group(trans, inode); 769 btrfs_set_trans_block_group(trans, inode);
770 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
736 771
737 actual_end = min_t(u64, isize, end + 1); 772 actual_end = min_t(u64, isize, end + 1);
738 773
@@ -752,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode,
752 EXTENT_CLEAR_UNLOCK_PAGE | 787 EXTENT_CLEAR_UNLOCK_PAGE |
753 EXTENT_CLEAR_UNLOCK | 788 EXTENT_CLEAR_UNLOCK |
754 EXTENT_CLEAR_DELALLOC | 789 EXTENT_CLEAR_DELALLOC |
755 EXTENT_CLEAR_ACCOUNTING |
756 EXTENT_CLEAR_DIRTY | 790 EXTENT_CLEAR_DIRTY |
757 EXTENT_SET_WRITEBACK | 791 EXTENT_SET_WRITEBACK |
758 EXTENT_END_WRITEBACK); 792 EXTENT_END_WRITEBACK);
@@ -768,35 +802,13 @@ static noinline int cow_file_range(struct inode *inode,
768 BUG_ON(disk_num_bytes > 802 BUG_ON(disk_num_bytes >
769 btrfs_super_total_bytes(&root->fs_info->super_copy)); 803 btrfs_super_total_bytes(&root->fs_info->super_copy));
770 804
771 805 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
772 read_lock(&BTRFS_I(inode)->extent_tree.lock);
773 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
774 start, num_bytes);
775 if (em) {
776 /*
777 * if block start isn't an actual block number then find the
778 * first block in this inode and use that as a hint. If that
779 * block is also bogus then just don't worry about it.
780 */
781 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
782 free_extent_map(em);
783 em = search_extent_mapping(em_tree, 0, 0);
784 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
785 alloc_hint = em->block_start;
786 if (em)
787 free_extent_map(em);
788 } else {
789 alloc_hint = em->block_start;
790 free_extent_map(em);
791 }
792 }
793 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
794 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 806 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
795 807
796 while (disk_num_bytes > 0) { 808 while (disk_num_bytes > 0) {
797 unsigned long op; 809 unsigned long op;
798 810
799 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 811 cur_alloc_size = disk_num_bytes;
800 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 812 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
801 root->sectorsize, 0, alloc_hint, 813 root->sectorsize, 0, alloc_hint,
802 (u64)-1, &ins, 1); 814 (u64)-1, &ins, 1);
@@ -1173,6 +1185,13 @@ out_check:
1173 num_bytes, num_bytes, type); 1185 num_bytes, num_bytes, type);
1174 BUG_ON(ret); 1186 BUG_ON(ret);
1175 1187
1188 if (root->root_key.objectid ==
1189 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1190 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1191 num_bytes);
1192 BUG_ON(ret);
1193 }
1194
1176 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1195 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1177 cur_offset, cur_offset + num_bytes - 1, 1196 cur_offset, cur_offset + num_bytes - 1,
1178 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1197 locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -1225,36 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1225} 1244}
1226 1245
1227static int btrfs_split_extent_hook(struct inode *inode, 1246static int btrfs_split_extent_hook(struct inode *inode,
1228 struct extent_state *orig, u64 split) 1247 struct extent_state *orig, u64 split)
1229{ 1248{
1230 struct btrfs_root *root = BTRFS_I(inode)->root; 1249 /* not delalloc, ignore it */
1231 u64 size;
1232
1233 if (!(orig->state & EXTENT_DELALLOC)) 1250 if (!(orig->state & EXTENT_DELALLOC))
1234 return 0; 1251 return 0;
1235 1252
1236 size = orig->end - orig->start + 1; 1253 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1237 if (size > root->fs_info->max_extent) {
1238 u64 num_extents;
1239 u64 new_size;
1240
1241 new_size = orig->end - split + 1;
1242 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1243 root->fs_info->max_extent);
1244
1245 /*
1246 * if we break a large extent up then leave oustanding_extents
1247 * be, since we've already accounted for the large extent.
1248 */
1249 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1250 root->fs_info->max_extent) < num_extents)
1251 return 0;
1252 }
1253
1254 spin_lock(&BTRFS_I(inode)->accounting_lock);
1255 BTRFS_I(inode)->outstanding_extents++;
1256 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1257
1258 return 0; 1254 return 0;
1259} 1255}
1260 1256
@@ -1268,42 +1264,11 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1268 struct extent_state *new, 1264 struct extent_state *new,
1269 struct extent_state *other) 1265 struct extent_state *other)
1270{ 1266{
1271 struct btrfs_root *root = BTRFS_I(inode)->root;
1272 u64 new_size, old_size;
1273 u64 num_extents;
1274
1275 /* not delalloc, ignore it */ 1267 /* not delalloc, ignore it */
1276 if (!(other->state & EXTENT_DELALLOC)) 1268 if (!(other->state & EXTENT_DELALLOC))
1277 return 0; 1269 return 0;
1278 1270
1279 old_size = other->end - other->start + 1; 1271 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1280 if (new->start < other->start)
1281 new_size = other->end - new->start + 1;
1282 else
1283 new_size = new->end - other->start + 1;
1284
1285 /* we're not bigger than the max, unreserve the space and go */
1286 if (new_size <= root->fs_info->max_extent) {
1287 spin_lock(&BTRFS_I(inode)->accounting_lock);
1288 BTRFS_I(inode)->outstanding_extents--;
1289 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1290 return 0;
1291 }
1292
1293 /*
1294 * If we grew by another max_extent, just return, we want to keep that
1295 * reserved amount.
1296 */
1297 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1298 root->fs_info->max_extent);
1299 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1300 root->fs_info->max_extent) > num_extents)
1301 return 0;
1302
1303 spin_lock(&BTRFS_I(inode)->accounting_lock);
1304 BTRFS_I(inode)->outstanding_extents--;
1305 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1306
1307 return 0; 1272 return 0;
1308} 1273}
1309 1274
@@ -1312,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1312 * bytes in this file, and to maintain the list of inodes that 1277 * bytes in this file, and to maintain the list of inodes that
1313 * have pending delalloc work to be done. 1278 * have pending delalloc work to be done.
1314 */ 1279 */
1315static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, 1280static int btrfs_set_bit_hook(struct inode *inode,
1316 unsigned long old, unsigned long bits) 1281 struct extent_state *state, int *bits)
1317{ 1282{
1318 1283
1319 /* 1284 /*
@@ -1321,16 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1321 * but in this case, we are only testeing for the DELALLOC 1286 * but in this case, we are only testeing for the DELALLOC
1322 * bit, which is only set or cleared with irqs on 1287 * bit, which is only set or cleared with irqs on
1323 */ 1288 */
1324 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1325 struct btrfs_root *root = BTRFS_I(inode)->root; 1290 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start;
1292
1293 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC;
1295 else
1296 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1326 1297
1327 spin_lock(&BTRFS_I(inode)->accounting_lock);
1328 BTRFS_I(inode)->outstanding_extents++;
1329 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1330 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1331 spin_lock(&root->fs_info->delalloc_lock); 1298 spin_lock(&root->fs_info->delalloc_lock);
1332 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1299 BTRFS_I(inode)->delalloc_bytes += len;
1333 root->fs_info->delalloc_bytes += end - start + 1; 1300 root->fs_info->delalloc_bytes += len;
1334 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1335 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1336 &root->fs_info->delalloc_inodes); 1303 &root->fs_info->delalloc_inodes);
@@ -1344,44 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1344 * extent_io.c clear_bit_hook, see set_bit_hook for why 1311 * extent_io.c clear_bit_hook, see set_bit_hook for why
1345 */ 1312 */
1346static int btrfs_clear_bit_hook(struct inode *inode, 1313static int btrfs_clear_bit_hook(struct inode *inode,
1347 struct extent_state *state, unsigned long bits) 1314 struct extent_state *state, int *bits)
1348{ 1315{
1349 /* 1316 /*
1350 * set_bit and clear bit hooks normally require _irqsave/restore 1317 * set_bit and clear bit hooks normally require _irqsave/restore
1351 * but in this case, we are only testeing for the DELALLOC 1318 * but in this case, we are only testeing for the DELALLOC
1352 * bit, which is only set or cleared with irqs on 1319 * bit, which is only set or cleared with irqs on
1353 */ 1320 */
1354 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1355 struct btrfs_root *root = BTRFS_I(inode)->root; 1322 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start;
1356 1324
1357 if (bits & EXTENT_DO_ACCOUNTING) { 1325 if (*bits & EXTENT_FIRST_DELALLOC)
1358 spin_lock(&BTRFS_I(inode)->accounting_lock); 1326 *bits &= ~EXTENT_FIRST_DELALLOC;
1359 BTRFS_I(inode)->outstanding_extents--; 1327 else if (!(*bits & EXTENT_DO_ACCOUNTING))
1360 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1328 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1361 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1329
1362 } 1330 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len);
1332
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
1334 btrfs_free_reserved_data_space(inode, len);
1363 1335
1364 spin_lock(&root->fs_info->delalloc_lock); 1336 spin_lock(&root->fs_info->delalloc_lock);
1365 if (state->end - state->start + 1 > 1337 root->fs_info->delalloc_bytes -= len;
1366 root->fs_info->delalloc_bytes) { 1338 BTRFS_I(inode)->delalloc_bytes -= len;
1367 printk(KERN_INFO "btrfs warning: delalloc account " 1339
1368 "%llu %llu\n",
1369 (unsigned long long)
1370 state->end - state->start + 1,
1371 (unsigned long long)
1372 root->fs_info->delalloc_bytes);
1373 btrfs_delalloc_free_space(root, inode, (u64)-1);
1374 root->fs_info->delalloc_bytes = 0;
1375 BTRFS_I(inode)->delalloc_bytes = 0;
1376 } else {
1377 btrfs_delalloc_free_space(root, inode,
1378 state->end -
1379 state->start + 1);
1380 root->fs_info->delalloc_bytes -= state->end -
1381 state->start + 1;
1382 BTRFS_I(inode)->delalloc_bytes -= state->end -
1383 state->start + 1;
1384 }
1385 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1340 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1386 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1387 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1430,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1430 */ 1385 */
1431static int __btrfs_submit_bio_start(struct inode *inode, int rw, 1386static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1432 struct bio *bio, int mirror_num, 1387 struct bio *bio, int mirror_num,
1433 unsigned long bio_flags) 1388 unsigned long bio_flags,
1389 u64 bio_offset)
1434{ 1390{
1435 struct btrfs_root *root = BTRFS_I(inode)->root; 1391 struct btrfs_root *root = BTRFS_I(inode)->root;
1436 int ret = 0; 1392 int ret = 0;
@@ -1449,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1449 * are inserted into the btree 1405 * are inserted into the btree
1450 */ 1406 */
1451static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 1407static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1452 int mirror_num, unsigned long bio_flags) 1408 int mirror_num, unsigned long bio_flags,
1409 u64 bio_offset)
1453{ 1410{
1454 struct btrfs_root *root = BTRFS_I(inode)->root; 1411 struct btrfs_root *root = BTRFS_I(inode)->root;
1455 return btrfs_map_bio(root, rw, bio, mirror_num, 1); 1412 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
@@ -1460,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1460 * on write, or reading the csums from the tree before a read 1417 * on write, or reading the csums from the tree before a read
1461 */ 1418 */
1462static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 1419static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1463 int mirror_num, unsigned long bio_flags) 1420 int mirror_num, unsigned long bio_flags,
1421 u64 bio_offset)
1464{ 1422{
1465 struct btrfs_root *root = BTRFS_I(inode)->root; 1423 struct btrfs_root *root = BTRFS_I(inode)->root;
1466 int ret = 0; 1424 int ret = 0;
@@ -1485,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1485 /* we're doing a write, do the async checksumming */ 1443 /* we're doing a write, do the async checksumming */
1486 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 1444 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1487 inode, rw, bio, mirror_num, 1445 inode, rw, bio, mirror_num,
1488 bio_flags, __btrfs_submit_bio_start, 1446 bio_flags, bio_offset,
1447 __btrfs_submit_bio_start,
1489 __btrfs_submit_bio_done); 1448 __btrfs_submit_bio_done);
1490 } 1449 }
1491 1450
@@ -1566,6 +1525,7 @@ again:
1566 goto again; 1525 goto again;
1567 } 1526 }
1568 1527
1528 BUG();
1569 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); 1529 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1570 ClearPageChecked(page); 1530 ClearPageChecked(page);
1571out: 1531out:
@@ -1696,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1696static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1656static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1697{ 1657{
1698 struct btrfs_root *root = BTRFS_I(inode)->root; 1658 struct btrfs_root *root = BTRFS_I(inode)->root;
1699 struct btrfs_trans_handle *trans; 1659 struct btrfs_trans_handle *trans = NULL;
1700 struct btrfs_ordered_extent *ordered_extent = NULL; 1660 struct btrfs_ordered_extent *ordered_extent = NULL;
1701 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1661 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1702 struct extent_state *cached_state = NULL; 1662 struct extent_state *cached_state = NULL;
@@ -1714,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1714 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1715 if (!ret) { 1675 if (!ret) {
1716 trans = btrfs_join_transaction(root, 1); 1676 trans = btrfs_join_transaction(root, 1);
1677 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1717 ret = btrfs_update_inode(trans, root, inode); 1679 ret = btrfs_update_inode(trans, root, inode);
1718 BUG_ON(ret); 1680 BUG_ON(ret);
1719 btrfs_end_transaction(trans, root);
1720 } 1681 }
1721 goto out; 1682 goto out;
1722 } 1683 }
@@ -1726,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1726 0, &cached_state, GFP_NOFS); 1687 0, &cached_state, GFP_NOFS);
1727 1688
1728 trans = btrfs_join_transaction(root, 1); 1689 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1729 1692
1730 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1693 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1731 compressed = 1; 1694 compressed = 1;
@@ -1757,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1757 add_pending_csums(trans, inode, ordered_extent->file_offset, 1720 add_pending_csums(trans, inode, ordered_extent->file_offset,
1758 &ordered_extent->list); 1721 &ordered_extent->list);
1759 1722
1760 /* this also removes the ordered extent from the tree */
1761 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1723 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1762 ret = btrfs_update_inode(trans, root, inode); 1724 ret = btrfs_update_inode(trans, root, inode);
1763 BUG_ON(ret); 1725 BUG_ON(ret);
1764 btrfs_end_transaction(trans, root);
1765out: 1726out:
1727 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1728 if (trans)
1729 btrfs_end_transaction(trans, root);
1766 /* once for us */ 1730 /* once for us */
1767 btrfs_put_ordered_extent(ordered_extent); 1731 btrfs_put_ordered_extent(ordered_extent);
1768 /* once for the tree */ 1732 /* once for the tree */
@@ -1884,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1884 1848
1885 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1849 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1886 failrec->last_mirror, 1850 failrec->last_mirror,
1887 failrec->bio_flags); 1851 failrec->bio_flags, 0);
1888 return 0; 1852 return 0;
1889} 1853}
1890 1854
@@ -2039,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2039} 2003}
2040 2004
2041/* 2005/*
2006 * calculate extra metadata reservation when snapshotting a subvolume
2007 * contains orphan files.
2008 */
2009void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2010 struct btrfs_pending_snapshot *pending,
2011 u64 *bytes_to_reserve)
2012{
2013 struct btrfs_root *root;
2014 struct btrfs_block_rsv *block_rsv;
2015 u64 num_bytes;
2016 int index;
2017
2018 root = pending->root;
2019 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2020 return;
2021
2022 block_rsv = root->orphan_block_rsv;
2023
2024 /* orphan block reservation for the snapshot */
2025 num_bytes = block_rsv->size;
2026
2027 /*
2028 * after the snapshot is created, COWing tree blocks may use more
2029 * space than it frees. So we should make sure there is enough
2030 * reserved space.
2031 */
2032 index = trans->transid & 0x1;
2033 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2034 num_bytes += block_rsv->size -
2035 (block_rsv->reserved + block_rsv->freed[index]);
2036 }
2037
2038 *bytes_to_reserve += num_bytes;
2039}
2040
2041void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2042 struct btrfs_pending_snapshot *pending)
2043{
2044 struct btrfs_root *root = pending->root;
2045 struct btrfs_root *snap = pending->snap;
2046 struct btrfs_block_rsv *block_rsv;
2047 u64 num_bytes;
2048 int index;
2049 int ret;
2050
2051 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2052 return;
2053
2054 /* refill source subvolume's orphan block reservation */
2055 block_rsv = root->orphan_block_rsv;
2056 index = trans->transid & 0x1;
2057 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2058 num_bytes = block_rsv->size -
2059 (block_rsv->reserved + block_rsv->freed[index]);
2060 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2061 root->orphan_block_rsv,
2062 num_bytes);
2063 BUG_ON(ret);
2064 }
2065
2066 /* setup orphan block reservation for the snapshot */
2067 block_rsv = btrfs_alloc_block_rsv(snap);
2068 BUG_ON(!block_rsv);
2069
2070 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2071 snap->orphan_block_rsv = block_rsv;
2072
2073 num_bytes = root->orphan_block_rsv->size;
2074 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2075 block_rsv, num_bytes);
2076 BUG_ON(ret);
2077
2078#if 0
2079 /* insert orphan item for the snapshot */
2080 WARN_ON(!root->orphan_item_inserted);
2081 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2082 snap->root_key.objectid);
2083 BUG_ON(ret);
2084 snap->orphan_item_inserted = 1;
2085#endif
2086}
2087
2088enum btrfs_orphan_cleanup_state {
2089 ORPHAN_CLEANUP_STARTED = 1,
2090 ORPHAN_CLEANUP_DONE = 2,
2091};
2092
2093/*
2094 * This is called in transaction commmit time. If there are no orphan
2095 * files in the subvolume, it removes orphan item and frees block_rsv
2096 * structure.
2097 */
2098void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2099 struct btrfs_root *root)
2100{
2101 int ret;
2102
2103 if (!list_empty(&root->orphan_list) ||
2104 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2105 return;
2106
2107 if (root->orphan_item_inserted &&
2108 btrfs_root_refs(&root->root_item) > 0) {
2109 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2110 root->root_key.objectid);
2111 BUG_ON(ret);
2112 root->orphan_item_inserted = 0;
2113 }
2114
2115 if (root->orphan_block_rsv) {
2116 WARN_ON(root->orphan_block_rsv->size > 0);
2117 btrfs_free_block_rsv(root, root->orphan_block_rsv);
2118 root->orphan_block_rsv = NULL;
2119 }
2120}
2121
2122/*
2042 * This creates an orphan entry for the given inode in case something goes 2123 * This creates an orphan entry for the given inode in case something goes
2043 * wrong in the middle of an unlink/truncate. 2124 * wrong in the middle of an unlink/truncate.
2125 *
2126 * NOTE: caller of this function should reserve 5 units of metadata for
2127 * this function.
2044 */ 2128 */
2045int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) 2129int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2046{ 2130{
2047 struct btrfs_root *root = BTRFS_I(inode)->root; 2131 struct btrfs_root *root = BTRFS_I(inode)->root;
2048 int ret = 0; 2132 struct btrfs_block_rsv *block_rsv = NULL;
2133 int reserve = 0;
2134 int insert = 0;
2135 int ret;
2049 2136
2050 spin_lock(&root->list_lock); 2137 if (!root->orphan_block_rsv) {
2138 block_rsv = btrfs_alloc_block_rsv(root);
2139 BUG_ON(!block_rsv);
2140 }
2051 2141
2052 /* already on the orphan list, we're good */ 2142 spin_lock(&root->orphan_lock);
2053 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2143 if (!root->orphan_block_rsv) {
2054 spin_unlock(&root->list_lock); 2144 root->orphan_block_rsv = block_rsv;
2055 return 0; 2145 } else if (block_rsv) {
2146 btrfs_free_block_rsv(root, block_rsv);
2147 block_rsv = NULL;
2056 } 2148 }
2057 2149
2058 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2150 if (list_empty(&BTRFS_I(inode)->i_orphan)) {
2151 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2152#if 0
2153 /*
2154 * For proper ENOSPC handling, we should do orphan
2155 * cleanup when mounting. But this introduces backward
2156 * compatibility issue.
2157 */
2158 if (!xchg(&root->orphan_item_inserted, 1))
2159 insert = 2;
2160 else
2161 insert = 1;
2162#endif
2163 insert = 1;
2164 } else {
2165 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2166 }
2059 2167
2060 spin_unlock(&root->list_lock); 2168 if (!BTRFS_I(inode)->orphan_meta_reserved) {
2169 BTRFS_I(inode)->orphan_meta_reserved = 1;
2170 reserve = 1;
2171 }
2172 spin_unlock(&root->orphan_lock);
2061 2173
2062 /* 2174 if (block_rsv)
2063 * insert an orphan item to track this unlinked/truncated file 2175 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2064 */
2065 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
2066 2176
2067 return ret; 2177 /* grab metadata reservation from transaction handle */
2178 if (reserve) {
2179 ret = btrfs_orphan_reserve_metadata(trans, inode);
2180 BUG_ON(ret);
2181 }
2182
2183 /* insert an orphan item to track this unlinked/truncated file */
2184 if (insert >= 1) {
2185 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
2186 BUG_ON(ret);
2187 }
2188
2189 /* insert an orphan item to track subvolume contains orphan files */
2190 if (insert >= 2) {
2191 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2192 root->root_key.objectid);
2193 BUG_ON(ret);
2194 }
2195 return 0;
2068} 2196}
2069 2197
2070/* 2198/*
@@ -2074,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2074int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) 2202int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2075{ 2203{
2076 struct btrfs_root *root = BTRFS_I(inode)->root; 2204 struct btrfs_root *root = BTRFS_I(inode)->root;
2205 int delete_item = 0;
2206 int release_rsv = 0;
2077 int ret = 0; 2207 int ret = 0;
2078 2208
2079 spin_lock(&root->list_lock); 2209 spin_lock(&root->orphan_lock);
2080 2210 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
2081 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2211 list_del_init(&BTRFS_I(inode)->i_orphan);
2082 spin_unlock(&root->list_lock); 2212 delete_item = 1;
2083 return 0;
2084 } 2213 }
2085 2214
2086 list_del_init(&BTRFS_I(inode)->i_orphan); 2215 if (BTRFS_I(inode)->orphan_meta_reserved) {
2087 if (!trans) { 2216 BTRFS_I(inode)->orphan_meta_reserved = 0;
2088 spin_unlock(&root->list_lock); 2217 release_rsv = 1;
2089 return 0;
2090 } 2218 }
2219 spin_unlock(&root->orphan_lock);
2091 2220
2092 spin_unlock(&root->list_lock); 2221 if (trans && delete_item) {
2222 ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
2223 BUG_ON(ret);
2224 }
2093 2225
2094 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2226 if (release_rsv)
2227 btrfs_orphan_release_metadata(inode);
2095 2228
2096 return ret; 2229 return 0;
2097} 2230}
2098 2231
2099/* 2232/*
@@ -2110,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2110 struct inode *inode; 2243 struct inode *inode;
2111 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2244 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2112 2245
2113 if (!xchg(&root->clean_orphans, 0)) 2246 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2114 return; 2247 return;
2115 2248
2116 path = btrfs_alloc_path(); 2249 path = btrfs_alloc_path();
@@ -2163,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2163 found_key.type = BTRFS_INODE_ITEM_KEY; 2296 found_key.type = BTRFS_INODE_ITEM_KEY;
2164 found_key.offset = 0; 2297 found_key.offset = 0;
2165 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); 2298 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2166 if (IS_ERR(inode)) 2299 BUG_ON(IS_ERR(inode));
2167 break;
2168 2300
2169 /* 2301 /*
2170 * add this inode to the orphan list so btrfs_orphan_del does 2302 * add this inode to the orphan list so btrfs_orphan_del does
2171 * the proper thing when we hit it 2303 * the proper thing when we hit it
2172 */ 2304 */
2173 spin_lock(&root->list_lock); 2305 spin_lock(&root->orphan_lock);
2174 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2306 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2175 spin_unlock(&root->list_lock); 2307 spin_unlock(&root->orphan_lock);
2176 2308
2177 /* 2309 /*
2178 * if this is a bad inode, means we actually succeeded in 2310 * if this is a bad inode, means we actually succeeded in
@@ -2181,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2181 * do a destroy_inode 2313 * do a destroy_inode
2182 */ 2314 */
2183 if (is_bad_inode(inode)) { 2315 if (is_bad_inode(inode)) {
2184 trans = btrfs_start_transaction(root, 1); 2316 trans = btrfs_start_transaction(root, 0);
2185 btrfs_orphan_del(trans, inode); 2317 btrfs_orphan_del(trans, inode);
2186 btrfs_end_transaction(trans, root); 2318 btrfs_end_transaction(trans, root);
2187 iput(inode); 2319 iput(inode);
@@ -2199,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2199 /* this will do delete_inode and everything for us */ 2331 /* this will do delete_inode and everything for us */
2200 iput(inode); 2332 iput(inode);
2201 } 2333 }
2334 btrfs_free_path(path);
2335
2336 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2337
2338 if (root->orphan_block_rsv)
2339 btrfs_block_rsv_release(root, root->orphan_block_rsv,
2340 (u64)-1);
2341
2342 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2343 trans = btrfs_join_transaction(root, 1);
2344 btrfs_end_transaction(trans, root);
2345 }
2202 2346
2203 if (nr_unlink) 2347 if (nr_unlink)
2204 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 2348 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
2205 if (nr_truncate) 2349 if (nr_truncate)
2206 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 2350 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
2207
2208 btrfs_free_path(path);
2209} 2351}
2210 2352
2211/* 2353/*
@@ -2524,44 +2666,217 @@ out:
2524 return ret; 2666 return ret;
2525} 2667}
2526 2668
2527static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 2669/* helper to check if there is any shared block in the path */
2670static int check_path_shared(struct btrfs_root *root,
2671 struct btrfs_path *path)
2672{
2673 struct extent_buffer *eb;
2674 int level;
2675 int ret;
2676 u64 refs = 1;
2677
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level])
2680 break;
2681 eb = path->nodes[level];
2682 if (!btrfs_block_can_be_shared(root, eb))
2683 continue;
2684 ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
2685 &refs, NULL);
2686 if (refs > 1)
2687 return 1;
2688 }
2689 return 0;
2690}
2691
2692/*
2693 * helper to start transaction for unlink and rmdir.
2694 *
2695 * unlink and rmdir are special in btrfs, they do not always free space.
2696 * so in enospc case, we should make sure they will free space before
2697 * allowing them to use the global metadata reservation.
2698 */
2699static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2700 struct dentry *dentry)
2528{ 2701{
2529 struct btrfs_root *root;
2530 struct btrfs_trans_handle *trans; 2702 struct btrfs_trans_handle *trans;
2703 struct btrfs_root *root = BTRFS_I(dir)->root;
2704 struct btrfs_path *path;
2705 struct btrfs_inode_ref *ref;
2706 struct btrfs_dir_item *di;
2531 struct inode *inode = dentry->d_inode; 2707 struct inode *inode = dentry->d_inode;
2708 u64 index;
2709 int check_link = 1;
2710 int err = -ENOSPC;
2532 int ret; 2711 int ret;
2533 unsigned long nr = 0;
2534 2712
2535 root = BTRFS_I(dir)->root; 2713 trans = btrfs_start_transaction(root, 10);
2714 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2715 return trans;
2536 2716
2537 /* 2717 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2538 * 5 items for unlink inode 2718 return ERR_PTR(-ENOSPC);
2539 * 1 for orphan 2719
2540 */ 2720 /* check if there is someone else holds reference */
2541 ret = btrfs_reserve_metadata_space(root, 6); 2721 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
2542 if (ret) 2722 return ERR_PTR(-ENOSPC);
2543 return ret;
2544 2723
2545 trans = btrfs_start_transaction(root, 1); 2724 if (atomic_read(&inode->i_count) > 2)
2725 return ERR_PTR(-ENOSPC);
2726
2727 if (xchg(&root->fs_info->enospc_unlink, 1))
2728 return ERR_PTR(-ENOSPC);
2729
2730 path = btrfs_alloc_path();
2731 if (!path) {
2732 root->fs_info->enospc_unlink = 0;
2733 return ERR_PTR(-ENOMEM);
2734 }
2735
2736 trans = btrfs_start_transaction(root, 0);
2546 if (IS_ERR(trans)) { 2737 if (IS_ERR(trans)) {
2547 btrfs_unreserve_metadata_space(root, 6); 2738 btrfs_free_path(path);
2548 return PTR_ERR(trans); 2739 root->fs_info->enospc_unlink = 0;
2740 return trans;
2549 } 2741 }
2550 2742
2743 path->skip_locking = 1;
2744 path->search_commit_root = 1;
2745
2746 ret = btrfs_lookup_inode(trans, root, path,
2747 &BTRFS_I(dir)->location, 0);
2748 if (ret < 0) {
2749 err = ret;
2750 goto out;
2751 }
2752 if (ret == 0) {
2753 if (check_path_shared(root, path))
2754 goto out;
2755 } else {
2756 check_link = 0;
2757 }
2758 btrfs_release_path(root, path);
2759
2760 ret = btrfs_lookup_inode(trans, root, path,
2761 &BTRFS_I(inode)->location, 0);
2762 if (ret < 0) {
2763 err = ret;
2764 goto out;
2765 }
2766 if (ret == 0) {
2767 if (check_path_shared(root, path))
2768 goto out;
2769 } else {
2770 check_link = 0;
2771 }
2772 btrfs_release_path(root, path);
2773
2774 if (ret == 0 && S_ISREG(inode->i_mode)) {
2775 ret = btrfs_lookup_file_extent(trans, root, path,
2776 inode->i_ino, (u64)-1, 0);
2777 if (ret < 0) {
2778 err = ret;
2779 goto out;
2780 }
2781 BUG_ON(ret == 0);
2782 if (check_path_shared(root, path))
2783 goto out;
2784 btrfs_release_path(root, path);
2785 }
2786
2787 if (!check_link) {
2788 err = 0;
2789 goto out;
2790 }
2791
2792 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2793 dentry->d_name.name, dentry->d_name.len, 0);
2794 if (IS_ERR(di)) {
2795 err = PTR_ERR(di);
2796 goto out;
2797 }
2798 if (di) {
2799 if (check_path_shared(root, path))
2800 goto out;
2801 } else {
2802 err = 0;
2803 goto out;
2804 }
2805 btrfs_release_path(root, path);
2806
2807 ref = btrfs_lookup_inode_ref(trans, root, path,
2808 dentry->d_name.name, dentry->d_name.len,
2809 inode->i_ino, dir->i_ino, 0);
2810 if (IS_ERR(ref)) {
2811 err = PTR_ERR(ref);
2812 goto out;
2813 }
2814 BUG_ON(!ref);
2815 if (check_path_shared(root, path))
2816 goto out;
2817 index = btrfs_inode_ref_index(path->nodes[0], ref);
2818 btrfs_release_path(root, path);
2819
2820 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
2821 dentry->d_name.name, dentry->d_name.len, 0);
2822 if (IS_ERR(di)) {
2823 err = PTR_ERR(di);
2824 goto out;
2825 }
2826 BUG_ON(ret == -ENOENT);
2827 if (check_path_shared(root, path))
2828 goto out;
2829
2830 err = 0;
2831out:
2832 btrfs_free_path(path);
2833 if (err) {
2834 btrfs_end_transaction(trans, root);
2835 root->fs_info->enospc_unlink = 0;
2836 return ERR_PTR(err);
2837 }
2838
2839 trans->block_rsv = &root->fs_info->global_block_rsv;
2840 return trans;
2841}
2842
2843static void __unlink_end_trans(struct btrfs_trans_handle *trans,
2844 struct btrfs_root *root)
2845{
2846 if (trans->block_rsv == &root->fs_info->global_block_rsv) {
2847 BUG_ON(!root->fs_info->enospc_unlink);
2848 root->fs_info->enospc_unlink = 0;
2849 }
2850 btrfs_end_transaction_throttle(trans, root);
2851}
2852
2853static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2854{
2855 struct btrfs_root *root = BTRFS_I(dir)->root;
2856 struct btrfs_trans_handle *trans;
2857 struct inode *inode = dentry->d_inode;
2858 int ret;
2859 unsigned long nr = 0;
2860
2861 trans = __unlink_start_trans(dir, dentry);
2862 if (IS_ERR(trans))
2863 return PTR_ERR(trans);
2864
2551 btrfs_set_trans_block_group(trans, dir); 2865 btrfs_set_trans_block_group(trans, dir);
2552 2866
2553 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); 2867 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
2554 2868
2555 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2869 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2556 dentry->d_name.name, dentry->d_name.len); 2870 dentry->d_name.name, dentry->d_name.len);
2871 BUG_ON(ret);
2557 2872
2558 if (inode->i_nlink == 0) 2873 if (inode->i_nlink == 0) {
2559 ret = btrfs_orphan_add(trans, inode); 2874 ret = btrfs_orphan_add(trans, inode);
2875 BUG_ON(ret);
2876 }
2560 2877
2561 nr = trans->blocks_used; 2878 nr = trans->blocks_used;
2562 2879 __unlink_end_trans(trans, root);
2563 btrfs_end_transaction_throttle(trans, root);
2564 btrfs_unreserve_metadata_space(root, 6);
2565 btrfs_btree_balance_dirty(root, nr); 2880 btrfs_btree_balance_dirty(root, nr);
2566 return ret; 2881 return ret;
2567} 2882}
@@ -2633,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2633{ 2948{
2634 struct inode *inode = dentry->d_inode; 2949 struct inode *inode = dentry->d_inode;
2635 int err = 0; 2950 int err = 0;
2636 int ret;
2637 struct btrfs_root *root = BTRFS_I(dir)->root; 2951 struct btrfs_root *root = BTRFS_I(dir)->root;
2638 struct btrfs_trans_handle *trans; 2952 struct btrfs_trans_handle *trans;
2639 unsigned long nr = 0; 2953 unsigned long nr = 0;
@@ -2642,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2642 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 2956 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2643 return -ENOTEMPTY; 2957 return -ENOTEMPTY;
2644 2958
2645 ret = btrfs_reserve_metadata_space(root, 5); 2959 trans = __unlink_start_trans(dir, dentry);
2646 if (ret) 2960 if (IS_ERR(trans))
2647 return ret;
2648
2649 trans = btrfs_start_transaction(root, 1);
2650 if (IS_ERR(trans)) {
2651 btrfs_unreserve_metadata_space(root, 5);
2652 return PTR_ERR(trans); 2961 return PTR_ERR(trans);
2653 }
2654 2962
2655 btrfs_set_trans_block_group(trans, dir); 2963 btrfs_set_trans_block_group(trans, dir);
2656 2964
@@ -2673,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2673 btrfs_i_size_write(inode, 0); 2981 btrfs_i_size_write(inode, 0);
2674out: 2982out:
2675 nr = trans->blocks_used; 2983 nr = trans->blocks_used;
2676 ret = btrfs_end_transaction_throttle(trans, root); 2984 __unlink_end_trans(trans, root);
2677 btrfs_unreserve_metadata_space(root, 5);
2678 btrfs_btree_balance_dirty(root, nr); 2985 btrfs_btree_balance_dirty(root, nr);
2679 2986
2680 if (ret && !err)
2681 err = ret;
2682 return err; 2987 return err;
2683} 2988}
2684 2989
@@ -3075,6 +3380,7 @@ out:
3075 if (pending_del_nr) { 3380 if (pending_del_nr) {
3076 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3381 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3077 pending_del_nr); 3382 pending_del_nr);
3383 BUG_ON(ret);
3078 } 3384 }
3079 btrfs_free_path(path); 3385 btrfs_free_path(path);
3080 return err; 3386 return err;
@@ -3102,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3102 3408
3103 if ((offset & (blocksize - 1)) == 0) 3409 if ((offset & (blocksize - 1)) == 0)
3104 goto out; 3410 goto out;
3105 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 3411 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
3106 if (ret)
3107 goto out;
3108
3109 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
3110 if (ret) 3412 if (ret)
3111 goto out; 3413 goto out;
3112 3414
@@ -3114,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3114again: 3416again:
3115 page = grab_cache_page(mapping, index); 3417 page = grab_cache_page(mapping, index);
3116 if (!page) { 3418 if (!page) {
3117 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3419 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3118 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3119 goto out; 3420 goto out;
3120 } 3421 }
3121 3422
@@ -3178,8 +3479,7 @@ again:
3178 3479
3179out_unlock: 3480out_unlock:
3180 if (ret) 3481 if (ret)
3181 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3482 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3182 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3183 unlock_page(page); 3483 unlock_page(page);
3184 page_cache_release(page); 3484 page_cache_release(page);
3185out: 3485out:
@@ -3191,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3191 struct btrfs_trans_handle *trans; 3491 struct btrfs_trans_handle *trans;
3192 struct btrfs_root *root = BTRFS_I(inode)->root; 3492 struct btrfs_root *root = BTRFS_I(inode)->root;
3193 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3493 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3194 struct extent_map *em; 3494 struct extent_map *em = NULL;
3195 struct extent_state *cached_state = NULL; 3495 struct extent_state *cached_state = NULL;
3196 u64 mask = root->sectorsize - 1; 3496 u64 mask = root->sectorsize - 1;
3197 u64 hole_start = (inode->i_size + mask) & ~mask; 3497 u64 hole_start = (inode->i_size + mask) & ~mask;
@@ -3229,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3229 u64 hint_byte = 0; 3529 u64 hint_byte = 0;
3230 hole_size = last_byte - cur_offset; 3530 hole_size = last_byte - cur_offset;
3231 3531
3232 err = btrfs_reserve_metadata_space(root, 2); 3532 trans = btrfs_start_transaction(root, 2);
3233 if (err) 3533 if (IS_ERR(trans)) {
3534 err = PTR_ERR(trans);
3234 break; 3535 break;
3235 3536 }
3236 trans = btrfs_start_transaction(root, 1);
3237 btrfs_set_trans_block_group(trans, inode); 3537 btrfs_set_trans_block_group(trans, inode);
3238 3538
3239 err = btrfs_drop_extents(trans, inode, cur_offset, 3539 err = btrfs_drop_extents(trans, inode, cur_offset,
@@ -3251,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3251 last_byte - 1, 0); 3551 last_byte - 1, 0);
3252 3552
3253 btrfs_end_transaction(trans, root); 3553 btrfs_end_transaction(trans, root);
3254 btrfs_unreserve_metadata_space(root, 2);
3255 } 3554 }
3256 free_extent_map(em); 3555 free_extent_map(em);
3556 em = NULL;
3257 cur_offset = last_byte; 3557 cur_offset = last_byte;
3258 if (cur_offset >= block_end) 3558 if (cur_offset >= block_end)
3259 break; 3559 break;
3260 } 3560 }
3261 3561
3562 free_extent_map(em);
3262 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, 3563 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3263 GFP_NOFS); 3564 GFP_NOFS);
3264 return err; 3565 return err;
@@ -3285,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3285 } 3586 }
3286 } 3587 }
3287 3588
3288 ret = btrfs_reserve_metadata_space(root, 1); 3589 trans = btrfs_start_transaction(root, 5);
3289 if (ret) 3590 if (IS_ERR(trans))
3290 return ret; 3591 return PTR_ERR(trans);
3291 3592
3292 trans = btrfs_start_transaction(root, 1);
3293 btrfs_set_trans_block_group(trans, inode); 3593 btrfs_set_trans_block_group(trans, inode);
3294 3594
3295 ret = btrfs_orphan_add(trans, inode); 3595 ret = btrfs_orphan_add(trans, inode);
@@ -3297,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3297 3597
3298 nr = trans->blocks_used; 3598 nr = trans->blocks_used;
3299 btrfs_end_transaction(trans, root); 3599 btrfs_end_transaction(trans, root);
3300 btrfs_unreserve_metadata_space(root, 1);
3301 btrfs_btree_balance_dirty(root, nr); 3600 btrfs_btree_balance_dirty(root, nr);
3302 3601
3303 if (attr->ia_size > inode->i_size) { 3602 if (attr->ia_size > inode->i_size) {
@@ -3310,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3310 i_size_write(inode, attr->ia_size); 3609 i_size_write(inode, attr->ia_size);
3311 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 3610 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3312 3611
3313 trans = btrfs_start_transaction(root, 1); 3612 trans = btrfs_start_transaction(root, 0);
3613 BUG_ON(IS_ERR(trans));
3314 btrfs_set_trans_block_group(trans, inode); 3614 btrfs_set_trans_block_group(trans, inode);
3615 trans->block_rsv = root->orphan_block_rsv;
3616 BUG_ON(!trans->block_rsv);
3315 3617
3316 ret = btrfs_update_inode(trans, root, inode); 3618 ret = btrfs_update_inode(trans, root, inode);
3317 BUG_ON(ret); 3619 BUG_ON(ret);
@@ -3391,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode)
3391 btrfs_i_size_write(inode, 0); 3693 btrfs_i_size_write(inode, 0);
3392 3694
3393 while (1) { 3695 while (1) {
3394 trans = btrfs_start_transaction(root, 1); 3696 trans = btrfs_start_transaction(root, 0);
3697 BUG_ON(IS_ERR(trans));
3395 btrfs_set_trans_block_group(trans, inode); 3698 btrfs_set_trans_block_group(trans, inode);
3396 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 3699 trans->block_rsv = root->orphan_block_rsv;
3397 3700
3701 ret = btrfs_block_rsv_check(trans, root,
3702 root->orphan_block_rsv, 0, 5);
3703 if (ret) {
3704 BUG_ON(ret != -EAGAIN);
3705 ret = btrfs_commit_transaction(trans, root);
3706 BUG_ON(ret);
3707 continue;
3708 }
3709
3710 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3398 if (ret != -EAGAIN) 3711 if (ret != -EAGAIN)
3399 break; 3712 break;
3400 3713
@@ -3402,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode)
3402 btrfs_end_transaction(trans, root); 3715 btrfs_end_transaction(trans, root);
3403 trans = NULL; 3716 trans = NULL;
3404 btrfs_btree_balance_dirty(root, nr); 3717 btrfs_btree_balance_dirty(root, nr);
3718
3405 } 3719 }
3406 3720
3407 if (ret == 0) { 3721 if (ret == 0) {
@@ -3642,40 +3956,10 @@ again:
3642 return 0; 3956 return 0;
3643} 3957}
3644 3958
3645static noinline void init_btrfs_i(struct inode *inode)
3646{
3647 struct btrfs_inode *bi = BTRFS_I(inode);
3648
3649 bi->generation = 0;
3650 bi->sequence = 0;
3651 bi->last_trans = 0;
3652 bi->last_sub_trans = 0;
3653 bi->logged_trans = 0;
3654 bi->delalloc_bytes = 0;
3655 bi->reserved_bytes = 0;
3656 bi->disk_i_size = 0;
3657 bi->flags = 0;
3658 bi->index_cnt = (u64)-1;
3659 bi->last_unlink_trans = 0;
3660 bi->ordered_data_close = 0;
3661 bi->force_compress = 0;
3662 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3663 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3664 inode->i_mapping, GFP_NOFS);
3665 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3666 inode->i_mapping, GFP_NOFS);
3667 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3668 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3669 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3670 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3671 mutex_init(&BTRFS_I(inode)->log_mutex);
3672}
3673
3674static int btrfs_init_locked_inode(struct inode *inode, void *p) 3959static int btrfs_init_locked_inode(struct inode *inode, void *p)
3675{ 3960{
3676 struct btrfs_iget_args *args = p; 3961 struct btrfs_iget_args *args = p;
3677 inode->i_ino = args->ino; 3962 inode->i_ino = args->ino;
3678 init_btrfs_i(inode);
3679 BTRFS_I(inode)->root = args->root; 3963 BTRFS_I(inode)->root = args->root;
3680 btrfs_set_inode_space_info(args->root, inode); 3964 btrfs_set_inode_space_info(args->root, inode);
3681 return 0; 3965 return 0;
@@ -3738,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s,
3738 if (!inode) 4022 if (!inode)
3739 return ERR_PTR(-ENOMEM); 4023 return ERR_PTR(-ENOMEM);
3740 4024
3741 init_btrfs_i(inode);
3742
3743 BTRFS_I(inode)->root = root; 4025 BTRFS_I(inode)->root = root;
3744 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); 4026 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
3745 BTRFS_I(inode)->dummy_inode = 1; 4027 BTRFS_I(inode)->dummy_inode = 1;
@@ -3996,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
3996 struct btrfs_trans_handle *trans; 4278 struct btrfs_trans_handle *trans;
3997 int ret = 0; 4279 int ret = 0;
3998 4280
3999 if (root->fs_info->btree_inode == inode) 4281 if (BTRFS_I(inode)->dummy_inode)
4000 return 0; 4282 return 0;
4001 4283
4002 if (wbc->sync_mode == WB_SYNC_ALL) { 4284 if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4017,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode)
4017{ 4299{
4018 struct btrfs_root *root = BTRFS_I(inode)->root; 4300 struct btrfs_root *root = BTRFS_I(inode)->root;
4019 struct btrfs_trans_handle *trans; 4301 struct btrfs_trans_handle *trans;
4302 int ret;
4303
4304 if (BTRFS_I(inode)->dummy_inode)
4305 return;
4020 4306
4021 trans = btrfs_join_transaction(root, 1); 4307 trans = btrfs_join_transaction(root, 1);
4022 btrfs_set_trans_block_group(trans, inode); 4308 btrfs_set_trans_block_group(trans, inode);
4023 btrfs_update_inode(trans, root, inode); 4309
4310 ret = btrfs_update_inode(trans, root, inode);
4311 if (ret && ret == -ENOSPC) {
4312 /* whoops, lets try again with the full transaction */
4313 btrfs_end_transaction(trans, root);
4314 trans = btrfs_start_transaction(root, 1);
4315 if (IS_ERR(trans)) {
4316 if (printk_ratelimit()) {
4317 printk(KERN_ERR "btrfs: fail to "
4318 "dirty inode %lu error %ld\n",
4319 inode->i_ino, PTR_ERR(trans));
4320 }
4321 return;
4322 }
4323 btrfs_set_trans_block_group(trans, inode);
4324
4325 ret = btrfs_update_inode(trans, root, inode);
4326 if (ret) {
4327 if (printk_ratelimit()) {
4328 printk(KERN_ERR "btrfs: fail to "
4329 "dirty inode %lu error %d\n",
4330 inode->i_ino, ret);
4331 }
4332 }
4333 }
4024 btrfs_end_transaction(trans, root); 4334 btrfs_end_transaction(trans, root);
4025} 4335}
4026 4336
@@ -4138,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4138 * btrfs_get_inode_index_count has an explanation for the magic 4448 * btrfs_get_inode_index_count has an explanation for the magic
4139 * number 4449 * number
4140 */ 4450 */
4141 init_btrfs_i(inode);
4142 BTRFS_I(inode)->index_cnt = 2; 4451 BTRFS_I(inode)->index_cnt = 2;
4143 BTRFS_I(inode)->root = root; 4452 BTRFS_I(inode)->root = root;
4144 BTRFS_I(inode)->generation = trans->transid; 4453 BTRFS_I(inode)->generation = trans->transid;
@@ -4167,16 +4476,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4167 if (ret != 0) 4476 if (ret != 0)
4168 goto fail; 4477 goto fail;
4169 4478
4170 inode->i_uid = current_fsuid(); 4479 inode_init_owner(inode, dir, mode);
4171
4172 if (dir && (dir->i_mode & S_ISGID)) {
4173 inode->i_gid = dir->i_gid;
4174 if (S_ISDIR(mode))
4175 mode |= S_ISGID;
4176 } else
4177 inode->i_gid = current_fsgid();
4178
4179 inode->i_mode = mode;
4180 inode->i_ino = objectid; 4480 inode->i_ino = objectid;
4181 inode_set_bytes(inode, 0); 4481 inode_set_bytes(inode, 0);
4182 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4482 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4302,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4302 if (!new_valid_dev(rdev)) 4602 if (!new_valid_dev(rdev))
4303 return -EINVAL; 4603 return -EINVAL;
4304 4604
4605 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4606 if (err)
4607 return err;
4608
4305 /* 4609 /*
4306 * 2 for inode item and ref 4610 * 2 for inode item and ref
4307 * 2 for dir items 4611 * 2 for dir items
4308 * 1 for xattr if selinux is on 4612 * 1 for xattr if selinux is on
4309 */ 4613 */
4310 err = btrfs_reserve_metadata_space(root, 5); 4614 trans = btrfs_start_transaction(root, 5);
4311 if (err) 4615 if (IS_ERR(trans))
4312 return err; 4616 return PTR_ERR(trans);
4313 4617
4314 trans = btrfs_start_transaction(root, 1);
4315 if (!trans)
4316 goto fail;
4317 btrfs_set_trans_block_group(trans, dir); 4618 btrfs_set_trans_block_group(trans, dir);
4318 4619
4319 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4320 if (err) {
4321 err = -ENOSPC;
4322 goto out_unlock;
4323 }
4324
4325 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4620 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4326 dentry->d_name.len, 4621 dentry->d_name.len,
4327 dentry->d_parent->d_inode->i_ino, objectid, 4622 dentry->d_parent->d_inode->i_ino, objectid,
@@ -4350,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4350out_unlock: 4645out_unlock:
4351 nr = trans->blocks_used; 4646 nr = trans->blocks_used;
4352 btrfs_end_transaction_throttle(trans, root); 4647 btrfs_end_transaction_throttle(trans, root);
4353fail: 4648 btrfs_btree_balance_dirty(root, nr);
4354 btrfs_unreserve_metadata_space(root, 5);
4355 if (drop_inode) { 4649 if (drop_inode) {
4356 inode_dec_link_count(inode); 4650 inode_dec_link_count(inode);
4357 iput(inode); 4651 iput(inode);
4358 } 4652 }
4359 btrfs_btree_balance_dirty(root, nr);
4360 return err; 4653 return err;
4361} 4654}
4362 4655
@@ -4366,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4366 struct btrfs_trans_handle *trans; 4659 struct btrfs_trans_handle *trans;
4367 struct btrfs_root *root = BTRFS_I(dir)->root; 4660 struct btrfs_root *root = BTRFS_I(dir)->root;
4368 struct inode *inode = NULL; 4661 struct inode *inode = NULL;
4369 int err;
4370 int drop_inode = 0; 4662 int drop_inode = 0;
4663 int err;
4371 unsigned long nr = 0; 4664 unsigned long nr = 0;
4372 u64 objectid; 4665 u64 objectid;
4373 u64 index = 0; 4666 u64 index = 0;
4374 4667
4668 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4669 if (err)
4670 return err;
4375 /* 4671 /*
4376 * 2 for inode item and ref 4672 * 2 for inode item and ref
4377 * 2 for dir items 4673 * 2 for dir items
4378 * 1 for xattr if selinux is on 4674 * 1 for xattr if selinux is on
4379 */ 4675 */
4380 err = btrfs_reserve_metadata_space(root, 5); 4676 trans = btrfs_start_transaction(root, 5);
4381 if (err) 4677 if (IS_ERR(trans))
4382 return err; 4678 return PTR_ERR(trans);
4383 4679
4384 trans = btrfs_start_transaction(root, 1);
4385 if (!trans)
4386 goto fail;
4387 btrfs_set_trans_block_group(trans, dir); 4680 btrfs_set_trans_block_group(trans, dir);
4388 4681
4389 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4390 if (err) {
4391 err = -ENOSPC;
4392 goto out_unlock;
4393 }
4394
4395 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4682 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4396 dentry->d_name.len, 4683 dentry->d_name.len,
4397 dentry->d_parent->d_inode->i_ino, 4684 dentry->d_parent->d_inode->i_ino,
@@ -4423,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4423out_unlock: 4710out_unlock:
4424 nr = trans->blocks_used; 4711 nr = trans->blocks_used;
4425 btrfs_end_transaction_throttle(trans, root); 4712 btrfs_end_transaction_throttle(trans, root);
4426fail:
4427 btrfs_unreserve_metadata_space(root, 5);
4428 if (drop_inode) { 4713 if (drop_inode) {
4429 inode_dec_link_count(inode); 4714 inode_dec_link_count(inode);
4430 iput(inode); 4715 iput(inode);
@@ -4451,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4451 if (root->objectid != BTRFS_I(inode)->root->objectid) 4736 if (root->objectid != BTRFS_I(inode)->root->objectid)
4452 return -EPERM; 4737 return -EPERM;
4453 4738
4454 /*
4455 * 1 item for inode ref
4456 * 2 items for dir items
4457 */
4458 err = btrfs_reserve_metadata_space(root, 3);
4459 if (err)
4460 return err;
4461
4462 btrfs_inc_nlink(inode); 4739 btrfs_inc_nlink(inode);
4463 4740
4464 err = btrfs_set_inode_index(dir, &index); 4741 err = btrfs_set_inode_index(dir, &index);
4465 if (err) 4742 if (err)
4466 goto fail; 4743 goto fail;
4467 4744
4468 trans = btrfs_start_transaction(root, 1); 4745 /*
4746 * 1 item for inode ref
4747 * 2 items for dir items
4748 */
4749 trans = btrfs_start_transaction(root, 3);
4750 if (IS_ERR(trans)) {
4751 err = PTR_ERR(trans);
4752 goto fail;
4753 }
4469 4754
4470 btrfs_set_trans_block_group(trans, dir); 4755 btrfs_set_trans_block_group(trans, dir);
4471 atomic_inc(&inode->i_count); 4756 atomic_inc(&inode->i_count);
@@ -4484,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4484 nr = trans->blocks_used; 4769 nr = trans->blocks_used;
4485 btrfs_end_transaction_throttle(trans, root); 4770 btrfs_end_transaction_throttle(trans, root);
4486fail: 4771fail:
4487 btrfs_unreserve_metadata_space(root, 3);
4488 if (drop_inode) { 4772 if (drop_inode) {
4489 inode_dec_link_count(inode); 4773 inode_dec_link_count(inode);
4490 iput(inode); 4774 iput(inode);
@@ -4504,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4504 u64 index = 0; 4788 u64 index = 0;
4505 unsigned long nr = 1; 4789 unsigned long nr = 1;
4506 4790
4791 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4792 if (err)
4793 return err;
4794
4507 /* 4795 /*
4508 * 2 items for inode and ref 4796 * 2 items for inode and ref
4509 * 2 items for dir items 4797 * 2 items for dir items
4510 * 1 for xattr if selinux is on 4798 * 1 for xattr if selinux is on
4511 */ 4799 */
4512 err = btrfs_reserve_metadata_space(root, 5); 4800 trans = btrfs_start_transaction(root, 5);
4513 if (err) 4801 if (IS_ERR(trans))
4514 return err; 4802 return PTR_ERR(trans);
4515
4516 trans = btrfs_start_transaction(root, 1);
4517 if (!trans) {
4518 err = -ENOMEM;
4519 goto out_unlock;
4520 }
4521 btrfs_set_trans_block_group(trans, dir); 4803 btrfs_set_trans_block_group(trans, dir);
4522 4804
4523 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4524 if (err) {
4525 err = -ENOSPC;
4526 goto out_fail;
4527 }
4528
4529 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4805 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4530 dentry->d_name.len, 4806 dentry->d_name.len,
4531 dentry->d_parent->d_inode->i_ino, objectid, 4807 dentry->d_parent->d_inode->i_ino, objectid,
@@ -4565,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4565out_fail: 4841out_fail:
4566 nr = trans->blocks_used; 4842 nr = trans->blocks_used;
4567 btrfs_end_transaction_throttle(trans, root); 4843 btrfs_end_transaction_throttle(trans, root);
4568
4569out_unlock:
4570 btrfs_unreserve_metadata_space(root, 5);
4571 if (drop_on_err) 4844 if (drop_on_err)
4572 iput(inode); 4845 iput(inode);
4573 btrfs_btree_balance_dirty(root, nr); 4846 btrfs_btree_balance_dirty(root, nr);
@@ -4825,6 +5098,7 @@ again:
4825 } 5098 }
4826 flush_dcache_page(page); 5099 flush_dcache_page(page);
4827 } else if (create && PageUptodate(page)) { 5100 } else if (create && PageUptodate(page)) {
5101 WARN_ON(1);
4828 if (!trans) { 5102 if (!trans) {
4829 kunmap(page); 5103 kunmap(page);
4830 free_extent_map(em); 5104 free_extent_map(em);
@@ -4921,11 +5195,651 @@ out:
4921 return em; 5195 return em;
4922} 5196}
4923 5197
5198static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5199 u64 start, u64 len)
5200{
5201 struct btrfs_root *root = BTRFS_I(inode)->root;
5202 struct btrfs_trans_handle *trans;
5203 struct extent_map *em;
5204 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5205 struct btrfs_key ins;
5206 u64 alloc_hint;
5207 int ret;
5208
5209 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5210
5211 trans = btrfs_join_transaction(root, 0);
5212 if (!trans)
5213 return ERR_PTR(-ENOMEM);
5214
5215 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5216
5217 alloc_hint = get_extent_allocation_hint(inode, start, len);
5218 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
5219 alloc_hint, (u64)-1, &ins, 1);
5220 if (ret) {
5221 em = ERR_PTR(ret);
5222 goto out;
5223 }
5224
5225 em = alloc_extent_map(GFP_NOFS);
5226 if (!em) {
5227 em = ERR_PTR(-ENOMEM);
5228 goto out;
5229 }
5230
5231 em->start = start;
5232 em->orig_start = em->start;
5233 em->len = ins.offset;
5234
5235 em->block_start = ins.objectid;
5236 em->block_len = ins.offset;
5237 em->bdev = root->fs_info->fs_devices->latest_bdev;
5238 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5239
5240 while (1) {
5241 write_lock(&em_tree->lock);
5242 ret = add_extent_mapping(em_tree, em);
5243 write_unlock(&em_tree->lock);
5244 if (ret != -EEXIST)
5245 break;
5246 btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
5247 }
5248
5249 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
5250 ins.offset, ins.offset, 0);
5251 if (ret) {
5252 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
5253 em = ERR_PTR(ret);
5254 }
5255out:
5256 btrfs_end_transaction(trans, root);
5257 return em;
5258}
5259
5260/*
5261 * returns 1 when the nocow is safe, < 1 on error, 0 if the
5262 * block must be cow'd
5263 */
5264static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5265 struct inode *inode, u64 offset, u64 len)
5266{
5267 struct btrfs_path *path;
5268 int ret;
5269 struct extent_buffer *leaf;
5270 struct btrfs_root *root = BTRFS_I(inode)->root;
5271 struct btrfs_file_extent_item *fi;
5272 struct btrfs_key key;
5273 u64 disk_bytenr;
5274 u64 backref_offset;
5275 u64 extent_end;
5276 u64 num_bytes;
5277 int slot;
5278 int found_type;
5279
5280 path = btrfs_alloc_path();
5281 if (!path)
5282 return -ENOMEM;
5283
5284 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
5285 offset, 0);
5286 if (ret < 0)
5287 goto out;
5288
5289 slot = path->slots[0];
5290 if (ret == 1) {
5291 if (slot == 0) {
5292 /* can't find the item, must cow */
5293 ret = 0;
5294 goto out;
5295 }
5296 slot--;
5297 }
5298 ret = 0;
5299 leaf = path->nodes[0];
5300 btrfs_item_key_to_cpu(leaf, &key, slot);
5301 if (key.objectid != inode->i_ino ||
5302 key.type != BTRFS_EXTENT_DATA_KEY) {
5303 /* not our file or wrong item type, must cow */
5304 goto out;
5305 }
5306
5307 if (key.offset > offset) {
5308 /* Wrong offset, must cow */
5309 goto out;
5310 }
5311
5312 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5313 found_type = btrfs_file_extent_type(leaf, fi);
5314 if (found_type != BTRFS_FILE_EXTENT_REG &&
5315 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
5316 /* not a regular extent, must cow */
5317 goto out;
5318 }
5319 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5320 backref_offset = btrfs_file_extent_offset(leaf, fi);
5321
5322 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
5323 if (extent_end < offset + len) {
5324 /* extent doesn't include our full range, must cow */
5325 goto out;
5326 }
5327
5328 if (btrfs_extent_readonly(root, disk_bytenr))
5329 goto out;
5330
5331 /*
5332 * look for other files referencing this extent, if we
5333 * find any we must cow
5334 */
5335 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
5336 key.offset - backref_offset, disk_bytenr))
5337 goto out;
5338
5339 /*
5340 * adjust disk_bytenr and num_bytes to cover just the bytes
5341 * in this extent we are about to write. If there
5342 * are any csums in that range we have to cow in order
5343 * to keep the csums correct
5344 */
5345 disk_bytenr += backref_offset;
5346 disk_bytenr += offset - key.offset;
5347 num_bytes = min(offset + len, extent_end) - offset;
5348 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
5349 goto out;
5350 /*
5351 * all of the above have passed, it is safe to overwrite this extent
5352 * without cow
5353 */
5354 ret = 1;
5355out:
5356 btrfs_free_path(path);
5357 return ret;
5358}
5359
5360static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5361 struct buffer_head *bh_result, int create)
5362{
5363 struct extent_map *em;
5364 struct btrfs_root *root = BTRFS_I(inode)->root;
5365 u64 start = iblock << inode->i_blkbits;
5366 u64 len = bh_result->b_size;
5367 struct btrfs_trans_handle *trans;
5368
5369 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5370 if (IS_ERR(em))
5371 return PTR_ERR(em);
5372
5373 /*
5374 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
5375 * io. INLINE is special, and we could probably kludge it in here, but
5376 * it's still buffered so for safety lets just fall back to the generic
5377 * buffered path.
5378 *
5379 * For COMPRESSED we _have_ to read the entire extent in so we can
5380 * decompress it, so there will be buffering required no matter what we
5381 * do, so go ahead and fallback to buffered.
5382 *
5383 * We return -ENOTBLK because thats what makes DIO go ahead and go back
5384 * to buffered IO. Don't blame me, this is the price we pay for using
5385 * the generic code.
5386 */
5387 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5388 em->block_start == EXTENT_MAP_INLINE) {
5389 free_extent_map(em);
5390 return -ENOTBLK;
5391 }
5392
5393 /* Just a good old fashioned hole, return */
5394 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5395 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5396 free_extent_map(em);
5397 /* DIO will do one hole at a time, so just unlock a sector */
5398 unlock_extent(&BTRFS_I(inode)->io_tree, start,
5399 start + root->sectorsize - 1, GFP_NOFS);
5400 return 0;
5401 }
5402
5403 /*
5404 * We don't allocate a new extent in the following cases
5405 *
5406 * 1) The inode is marked as NODATACOW. In this case we'll just use the
5407 * existing extent.
5408 * 2) The extent is marked as PREALLOC. We're good to go here and can
5409 * just use the extent.
5410 *
5411 */
5412 if (!create) {
5413 len = em->len - (start - em->start);
5414 goto map;
5415 }
5416
5417 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
5418 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
5419 em->block_start != EXTENT_MAP_HOLE)) {
5420 int type;
5421 int ret;
5422 u64 block_start;
5423
5424 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5425 type = BTRFS_ORDERED_PREALLOC;
5426 else
5427 type = BTRFS_ORDERED_NOCOW;
5428 len = min(len, em->len - (start - em->start));
5429 block_start = em->block_start + (start - em->start);
5430
5431 /*
5432 * we're not going to log anything, but we do need
5433 * to make sure the current transaction stays open
5434 * while we look for nocow cross refs
5435 */
5436 trans = btrfs_join_transaction(root, 0);
5437 if (!trans)
5438 goto must_cow;
5439
5440 if (can_nocow_odirect(trans, inode, start, len) == 1) {
5441 ret = btrfs_add_ordered_extent_dio(inode, start,
5442 block_start, len, len, type);
5443 btrfs_end_transaction(trans, root);
5444 if (ret) {
5445 free_extent_map(em);
5446 return ret;
5447 }
5448 goto unlock;
5449 }
5450 btrfs_end_transaction(trans, root);
5451 }
5452must_cow:
5453 /*
5454 * this will cow the extent, reset the len in case we changed
5455 * it above
5456 */
5457 len = bh_result->b_size;
5458 free_extent_map(em);
5459 em = btrfs_new_extent_direct(inode, start, len);
5460 if (IS_ERR(em))
5461 return PTR_ERR(em);
5462 len = min(len, em->len - (start - em->start));
5463unlock:
5464 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5465 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5466 0, NULL, GFP_NOFS);
5467map:
5468 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5469 inode->i_blkbits;
5470 bh_result->b_size = len;
5471 bh_result->b_bdev = em->bdev;
5472 set_buffer_mapped(bh_result);
5473 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5474 set_buffer_new(bh_result);
5475
5476 free_extent_map(em);
5477
5478 return 0;
5479}
5480
5481struct btrfs_dio_private {
5482 struct inode *inode;
5483 u64 logical_offset;
5484 u64 disk_bytenr;
5485 u64 bytes;
5486 u32 *csums;
5487 void *private;
5488};
5489
5490static void btrfs_endio_direct_read(struct bio *bio, int err)
5491{
5492 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5493 struct bio_vec *bvec = bio->bi_io_vec;
5494 struct btrfs_dio_private *dip = bio->bi_private;
5495 struct inode *inode = dip->inode;
5496 struct btrfs_root *root = BTRFS_I(inode)->root;
5497 u64 start;
5498 u32 *private = dip->csums;
5499
5500 start = dip->logical_offset;
5501 do {
5502 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
5503 struct page *page = bvec->bv_page;
5504 char *kaddr;
5505 u32 csum = ~(u32)0;
5506 unsigned long flags;
5507
5508 local_irq_save(flags);
5509 kaddr = kmap_atomic(page, KM_IRQ0);
5510 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5511 csum, bvec->bv_len);
5512 btrfs_csum_final(csum, (char *)&csum);
5513 kunmap_atomic(kaddr, KM_IRQ0);
5514 local_irq_restore(flags);
5515
5516 flush_dcache_page(bvec->bv_page);
5517 if (csum != *private) {
5518 printk(KERN_ERR "btrfs csum failed ino %lu off"
5519 " %llu csum %u private %u\n",
5520 inode->i_ino, (unsigned long long)start,
5521 csum, *private);
5522 err = -EIO;
5523 }
5524 }
5525
5526 start += bvec->bv_len;
5527 private++;
5528 bvec++;
5529 } while (bvec <= bvec_end);
5530
5531 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
5532 dip->logical_offset + dip->bytes - 1, GFP_NOFS);
5533 bio->bi_private = dip->private;
5534
5535 kfree(dip->csums);
5536 kfree(dip);
5537 dio_end_io(bio, err);
5538}
5539
5540static void btrfs_endio_direct_write(struct bio *bio, int err)
5541{
5542 struct btrfs_dio_private *dip = bio->bi_private;
5543 struct inode *inode = dip->inode;
5544 struct btrfs_root *root = BTRFS_I(inode)->root;
5545 struct btrfs_trans_handle *trans;
5546 struct btrfs_ordered_extent *ordered = NULL;
5547 struct extent_state *cached_state = NULL;
5548 int ret;
5549
5550 if (err)
5551 goto out_done;
5552
5553 ret = btrfs_dec_test_ordered_pending(inode, &ordered,
5554 dip->logical_offset, dip->bytes);
5555 if (!ret)
5556 goto out_done;
5557
5558 BUG_ON(!ordered);
5559
5560 trans = btrfs_join_transaction(root, 1);
5561 if (!trans) {
5562 err = -ENOMEM;
5563 goto out;
5564 }
5565 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5566
5567 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5568 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5569 if (!ret)
5570 ret = btrfs_update_inode(trans, root, inode);
5571 err = ret;
5572 goto out;
5573 }
5574
5575 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5576 ordered->file_offset + ordered->len - 1, 0,
5577 &cached_state, GFP_NOFS);
5578
5579 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5580 ret = btrfs_mark_extent_written(trans, inode,
5581 ordered->file_offset,
5582 ordered->file_offset +
5583 ordered->len);
5584 if (ret) {
5585 err = ret;
5586 goto out_unlock;
5587 }
5588 } else {
5589 ret = insert_reserved_file_extent(trans, inode,
5590 ordered->file_offset,
5591 ordered->start,
5592 ordered->disk_len,
5593 ordered->len,
5594 ordered->len,
5595 0, 0, 0,
5596 BTRFS_FILE_EXTENT_REG);
5597 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5598 ordered->file_offset, ordered->len);
5599 if (ret) {
5600 err = ret;
5601 WARN_ON(1);
5602 goto out_unlock;
5603 }
5604 }
5605
5606 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5607 btrfs_ordered_update_i_size(inode, 0, ordered);
5608 btrfs_update_inode(trans, root, inode);
5609out_unlock:
5610 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5611 ordered->file_offset + ordered->len - 1,
5612 &cached_state, GFP_NOFS);
5613out:
5614 btrfs_delalloc_release_metadata(inode, ordered->len);
5615 btrfs_end_transaction(trans, root);
5616 btrfs_put_ordered_extent(ordered);
5617 btrfs_put_ordered_extent(ordered);
5618out_done:
5619 bio->bi_private = dip->private;
5620
5621 kfree(dip->csums);
5622 kfree(dip);
5623 dio_end_io(bio, err);
5624}
5625
5626static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5627 struct bio *bio, int mirror_num,
5628 unsigned long bio_flags, u64 offset)
5629{
5630 int ret;
5631 struct btrfs_root *root = BTRFS_I(inode)->root;
5632 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
5633 BUG_ON(ret);
5634 return 0;
5635}
5636
5637static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5638 loff_t file_offset)
5639{
5640 struct btrfs_root *root = BTRFS_I(inode)->root;
5641 struct btrfs_dio_private *dip;
5642 struct bio_vec *bvec = bio->bi_io_vec;
5643 u64 start;
5644 int skip_sum;
5645 int write = rw & (1 << BIO_RW);
5646 int ret = 0;
5647
5648 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
5649
5650 dip = kmalloc(sizeof(*dip), GFP_NOFS);
5651 if (!dip) {
5652 ret = -ENOMEM;
5653 goto free_ordered;
5654 }
5655 dip->csums = NULL;
5656
5657 if (!skip_sum) {
5658 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5659 if (!dip->csums) {
5660 ret = -ENOMEM;
5661 goto free_ordered;
5662 }
5663 }
5664
5665 dip->private = bio->bi_private;
5666 dip->inode = inode;
5667 dip->logical_offset = file_offset;
5668
5669 start = dip->logical_offset;
5670 dip->bytes = 0;
5671 do {
5672 dip->bytes += bvec->bv_len;
5673 bvec++;
5674 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
5675
5676 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5677 bio->bi_private = dip;
5678
5679 if (write)
5680 bio->bi_end_io = btrfs_endio_direct_write;
5681 else
5682 bio->bi_end_io = btrfs_endio_direct_read;
5683
5684 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5685 if (ret)
5686 goto out_err;
5687
5688 if (write && !skip_sum) {
5689 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
5690 inode, rw, bio, 0, 0,
5691 dip->logical_offset,
5692 __btrfs_submit_bio_start_direct_io,
5693 __btrfs_submit_bio_done);
5694 if (ret)
5695 goto out_err;
5696 return;
5697 } else if (!skip_sum)
5698 btrfs_lookup_bio_sums_dio(root, inode, bio,
5699 dip->logical_offset, dip->csums);
5700
5701 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5702 if (ret)
5703 goto out_err;
5704 return;
5705out_err:
5706 kfree(dip->csums);
5707 kfree(dip);
5708free_ordered:
5709 /*
5710 * If this is a write, we need to clean up the reserved space and kill
5711 * the ordered extent.
5712 */
5713 if (write) {
5714 struct btrfs_ordered_extent *ordered;
5715 ordered = btrfs_lookup_ordered_extent(inode,
5716 dip->logical_offset);
5717 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5718 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5719 btrfs_free_reserved_extent(root, ordered->start,
5720 ordered->disk_len);
5721 btrfs_put_ordered_extent(ordered);
5722 btrfs_put_ordered_extent(ordered);
5723 }
5724 bio_endio(bio, ret);
5725}
5726
5727static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
5728 const struct iovec *iov, loff_t offset,
5729 unsigned long nr_segs)
5730{
5731 int seg;
5732 size_t size;
5733 unsigned long addr;
5734 unsigned blocksize_mask = root->sectorsize - 1;
5735 ssize_t retval = -EINVAL;
5736 loff_t end = offset;
5737
5738 if (offset & blocksize_mask)
5739 goto out;
5740
5741 /* Check the memory alignment. Blocks cannot straddle pages */
5742 for (seg = 0; seg < nr_segs; seg++) {
5743 addr = (unsigned long)iov[seg].iov_base;
5744 size = iov[seg].iov_len;
5745 end += size;
5746 if ((addr & blocksize_mask) || (size & blocksize_mask))
5747 goto out;
5748 }
5749 retval = 0;
5750out:
5751 return retval;
5752}
4924static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 5753static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
4925 const struct iovec *iov, loff_t offset, 5754 const struct iovec *iov, loff_t offset,
4926 unsigned long nr_segs) 5755 unsigned long nr_segs)
4927{ 5756{
4928 return -EINVAL; 5757 struct file *file = iocb->ki_filp;
5758 struct inode *inode = file->f_mapping->host;
5759 struct btrfs_ordered_extent *ordered;
5760 struct extent_state *cached_state = NULL;
5761 u64 lockstart, lockend;
5762 ssize_t ret;
5763 int writing = rw & WRITE;
5764 int write_bits = 0;
5765 size_t count = iov_length(iov, nr_segs);
5766
5767 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
5768 offset, nr_segs)) {
5769 return 0;
5770 }
5771
5772 lockstart = offset;
5773 lockend = offset + count - 1;
5774
5775 if (writing) {
5776 ret = btrfs_delalloc_reserve_space(inode, count);
5777 if (ret)
5778 goto out;
5779 }
5780
5781 while (1) {
5782 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5783 0, &cached_state, GFP_NOFS);
5784 /*
5785 * We're concerned with the entire range that we're going to be
5786 * doing DIO to, so we need to make sure theres no ordered
5787 * extents in this range.
5788 */
5789 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5790 lockend - lockstart + 1);
5791 if (!ordered)
5792 break;
5793 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5794 &cached_state, GFP_NOFS);
5795 btrfs_start_ordered_extent(inode, ordered, 1);
5796 btrfs_put_ordered_extent(ordered);
5797 cond_resched();
5798 }
5799
5800 /*
5801 * we don't use btrfs_set_extent_delalloc because we don't want
5802 * the dirty or uptodate bits
5803 */
5804 if (writing) {
5805 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
5806 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5807 EXTENT_DELALLOC, 0, NULL, &cached_state,
5808 GFP_NOFS);
5809 if (ret) {
5810 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5811 lockend, EXTENT_LOCKED | write_bits,
5812 1, 0, &cached_state, GFP_NOFS);
5813 goto out;
5814 }
5815 }
5816
5817 free_extent_state(cached_state);
5818 cached_state = NULL;
5819
5820 ret = __blockdev_direct_IO(rw, iocb, inode,
5821 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
5822 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
5823 btrfs_submit_direct, 0);
5824
5825 if (ret < 0 && ret != -EIOCBQUEUED) {
5826 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
5827 offset + iov_length(iov, nr_segs) - 1,
5828 EXTENT_LOCKED | write_bits, 1, 0,
5829 &cached_state, GFP_NOFS);
5830 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
5831 /*
5832 * We're falling back to buffered, unlock the section we didn't
5833 * do IO on.
5834 */
5835 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
5836 offset + iov_length(iov, nr_segs) - 1,
5837 EXTENT_LOCKED | write_bits, 1, 0,
5838 &cached_state, GFP_NOFS);
5839 }
5840out:
5841 free_extent_state(cached_state);
5842 return ret;
4929} 5843}
4930 5844
4931static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 5845static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -5089,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5089 u64 page_start; 6003 u64 page_start;
5090 u64 page_end; 6004 u64 page_end;
5091 6005
5092 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 6006 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
5093 if (ret) { 6007 if (ret) {
5094 if (ret == -ENOMEM) 6008 if (ret == -ENOMEM)
5095 ret = VM_FAULT_OOM; 6009 ret = VM_FAULT_OOM;
@@ -5098,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5098 goto out; 6012 goto out;
5099 } 6013 }
5100 6014
5101 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
5102 if (ret) {
5103 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5104 ret = VM_FAULT_SIGBUS;
5105 goto out;
5106 }
5107
5108 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 6015 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
5109again: 6016again:
5110 lock_page(page); 6017 lock_page(page);
@@ -5114,7 +6021,6 @@ again:
5114 6021
5115 if ((page->mapping != inode->i_mapping) || 6022 if ((page->mapping != inode->i_mapping) ||
5116 (page_start >= size)) { 6023 (page_start >= size)) {
5117 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5118 /* page got truncated out from underneath us */ 6024 /* page got truncated out from underneath us */
5119 goto out_unlock; 6025 goto out_unlock;
5120 } 6026 }
@@ -5155,7 +6061,6 @@ again:
5155 unlock_extent_cached(io_tree, page_start, page_end, 6061 unlock_extent_cached(io_tree, page_start, page_end,
5156 &cached_state, GFP_NOFS); 6062 &cached_state, GFP_NOFS);
5157 ret = VM_FAULT_SIGBUS; 6063 ret = VM_FAULT_SIGBUS;
5158 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5159 goto out_unlock; 6064 goto out_unlock;
5160 } 6065 }
5161 ret = 0; 6066 ret = 0;
@@ -5182,10 +6087,10 @@ again:
5182 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); 6087 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5183 6088
5184out_unlock: 6089out_unlock:
5185 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
5186 if (!ret) 6090 if (!ret)
5187 return VM_FAULT_LOCKED; 6091 return VM_FAULT_LOCKED;
5188 unlock_page(page); 6092 unlock_page(page);
6093 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
5189out: 6094out:
5190 return ret; 6095 return ret;
5191} 6096}
@@ -5210,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode)
5210 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6115 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5211 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6116 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5212 6117
5213 trans = btrfs_start_transaction(root, 1); 6118 trans = btrfs_start_transaction(root, 0);
6119 BUG_ON(IS_ERR(trans));
5214 btrfs_set_trans_block_group(trans, inode); 6120 btrfs_set_trans_block_group(trans, inode);
6121 trans->block_rsv = root->orphan_block_rsv;
5215 6122
5216 /* 6123 /*
5217 * setattr is responsible for setting the ordered_data_close flag, 6124 * setattr is responsible for setting the ordered_data_close flag,
@@ -5234,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode)
5234 btrfs_add_ordered_operation(trans, root, inode); 6141 btrfs_add_ordered_operation(trans, root, inode);
5235 6142
5236 while (1) { 6143 while (1) {
6144 if (!trans) {
6145 trans = btrfs_start_transaction(root, 0);
6146 BUG_ON(IS_ERR(trans));
6147 btrfs_set_trans_block_group(trans, inode);
6148 trans->block_rsv = root->orphan_block_rsv;
6149 }
6150
6151 ret = btrfs_block_rsv_check(trans, root,
6152 root->orphan_block_rsv, 0, 5);
6153 if (ret) {
6154 BUG_ON(ret != -EAGAIN);
6155 ret = btrfs_commit_transaction(trans, root);
6156 BUG_ON(ret);
6157 trans = NULL;
6158 continue;
6159 }
6160
5237 ret = btrfs_truncate_inode_items(trans, root, inode, 6161 ret = btrfs_truncate_inode_items(trans, root, inode,
5238 inode->i_size, 6162 inode->i_size,
5239 BTRFS_EXTENT_DATA_KEY); 6163 BTRFS_EXTENT_DATA_KEY);
@@ -5245,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode)
5245 6169
5246 nr = trans->blocks_used; 6170 nr = trans->blocks_used;
5247 btrfs_end_transaction(trans, root); 6171 btrfs_end_transaction(trans, root);
6172 trans = NULL;
5248 btrfs_btree_balance_dirty(root, nr); 6173 btrfs_btree_balance_dirty(root, nr);
5249
5250 trans = btrfs_start_transaction(root, 1);
5251 btrfs_set_trans_block_group(trans, inode);
5252 } 6174 }
5253 6175
5254 if (ret == 0 && inode->i_nlink > 0) { 6176 if (ret == 0 && inode->i_nlink > 0) {
@@ -5309,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
5309struct inode *btrfs_alloc_inode(struct super_block *sb) 6231struct inode *btrfs_alloc_inode(struct super_block *sb)
5310{ 6232{
5311 struct btrfs_inode *ei; 6233 struct btrfs_inode *ei;
6234 struct inode *inode;
5312 6235
5313 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); 6236 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
5314 if (!ei) 6237 if (!ei)
5315 return NULL; 6238 return NULL;
6239
6240 ei->root = NULL;
6241 ei->space_info = NULL;
6242 ei->generation = 0;
6243 ei->sequence = 0;
5316 ei->last_trans = 0; 6244 ei->last_trans = 0;
5317 ei->last_sub_trans = 0; 6245 ei->last_sub_trans = 0;
5318 ei->logged_trans = 0; 6246 ei->logged_trans = 0;
5319 ei->outstanding_extents = 0; 6247 ei->delalloc_bytes = 0;
5320 ei->reserved_extents = 0; 6248 ei->reserved_bytes = 0;
5321 ei->root = NULL; 6249 ei->disk_i_size = 0;
6250 ei->flags = 0;
6251 ei->index_cnt = (u64)-1;
6252 ei->last_unlink_trans = 0;
6253
5322 spin_lock_init(&ei->accounting_lock); 6254 spin_lock_init(&ei->accounting_lock);
6255 atomic_set(&ei->outstanding_extents, 0);
6256 ei->reserved_extents = 0;
6257
6258 ei->ordered_data_close = 0;
6259 ei->orphan_meta_reserved = 0;
6260 ei->dummy_inode = 0;
6261 ei->force_compress = 0;
6262
6263 inode = &ei->vfs_inode;
6264 extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
6265 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
6266 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
6267 mutex_init(&ei->log_mutex);
5323 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6268 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
5324 INIT_LIST_HEAD(&ei->i_orphan); 6269 INIT_LIST_HEAD(&ei->i_orphan);
6270 INIT_LIST_HEAD(&ei->delalloc_inodes);
5325 INIT_LIST_HEAD(&ei->ordered_operations); 6271 INIT_LIST_HEAD(&ei->ordered_operations);
5326 return &ei->vfs_inode; 6272 RB_CLEAR_NODE(&ei->rb_node);
6273
6274 return inode;
5327} 6275}
5328 6276
5329void btrfs_destroy_inode(struct inode *inode) 6277void btrfs_destroy_inode(struct inode *inode)
@@ -5333,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode)
5333 6281
5334 WARN_ON(!list_empty(&inode->i_dentry)); 6282 WARN_ON(!list_empty(&inode->i_dentry));
5335 WARN_ON(inode->i_data.nrpages); 6283 WARN_ON(inode->i_data.nrpages);
6284 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
6285 WARN_ON(BTRFS_I(inode)->reserved_extents);
5336 6286
5337 /* 6287 /*
5338 * This can happen where we create an inode, but somebody else also 6288 * This can happen where we create an inode, but somebody else also
@@ -5353,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode)
5353 spin_unlock(&root->fs_info->ordered_extent_lock); 6303 spin_unlock(&root->fs_info->ordered_extent_lock);
5354 } 6304 }
5355 6305
5356 spin_lock(&root->list_lock); 6306 spin_lock(&root->orphan_lock);
5357 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6307 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5358 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6308 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5359 inode->i_ino); 6309 inode->i_ino);
5360 list_del_init(&BTRFS_I(inode)->i_orphan); 6310 list_del_init(&BTRFS_I(inode)->i_orphan);
5361 } 6311 }
5362 spin_unlock(&root->list_lock); 6312 spin_unlock(&root->orphan_lock);
5363 6313
5364 while (1) { 6314 while (1) {
5365 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6315 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -5384,7 +6334,6 @@ free:
5384void btrfs_drop_inode(struct inode *inode) 6334void btrfs_drop_inode(struct inode *inode)
5385{ 6335{
5386 struct btrfs_root *root = BTRFS_I(inode)->root; 6336 struct btrfs_root *root = BTRFS_I(inode)->root;
5387
5388 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 6337 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
5389 generic_delete_inode(inode); 6338 generic_delete_inode(inode);
5390 else 6339 else
@@ -5481,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5481 if (S_ISDIR(old_inode->i_mode) && new_inode && 6430 if (S_ISDIR(old_inode->i_mode) && new_inode &&
5482 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) 6431 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
5483 return -ENOTEMPTY; 6432 return -ENOTEMPTY;
5484
5485 /*
5486 * We want to reserve the absolute worst case amount of items. So if
5487 * both inodes are subvols and we need to unlink them then that would
5488 * require 4 item modifications, but if they are both normal inodes it
5489 * would require 5 item modifications, so we'll assume their normal
5490 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
5491 * should cover the worst case number of items we'll modify.
5492 */
5493 ret = btrfs_reserve_metadata_space(root, 11);
5494 if (ret)
5495 return ret;
5496
5497 /* 6433 /*
5498 * we're using rename to replace one file with another. 6434 * we're using rename to replace one file with another.
5499 * and the replacement file is large. Start IO on it now so 6435 * and the replacement file is large. Start IO on it now so
@@ -5506,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5506 /* close the racy window with snapshot create/destroy ioctl */ 6442 /* close the racy window with snapshot create/destroy ioctl */
5507 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6443 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5508 down_read(&root->fs_info->subvol_sem); 6444 down_read(&root->fs_info->subvol_sem);
6445 /*
6446 * We want to reserve the absolute worst case amount of items. So if
6447 * both inodes are subvols and we need to unlink them then that would
6448 * require 4 item modifications, but if they are both normal inodes it
6449 * would require 5 item modifications, so we'll assume their normal
6450 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
6451 * should cover the worst case number of items we'll modify.
6452 */
6453 trans = btrfs_start_transaction(root, 20);
6454 if (IS_ERR(trans))
6455 return PTR_ERR(trans);
5509 6456
5510 trans = btrfs_start_transaction(root, 1);
5511 btrfs_set_trans_block_group(trans, new_dir); 6457 btrfs_set_trans_block_group(trans, new_dir);
5512 6458
5513 if (dest != root) 6459 if (dest != root)
@@ -5606,7 +6552,6 @@ out_fail:
5606 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6552 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5607 up_read(&root->fs_info->subvol_sem); 6553 up_read(&root->fs_info->subvol_sem);
5608 6554
5609 btrfs_unreserve_metadata_space(root, 11);
5610 return ret; 6555 return ret;
5611} 6556}
5612 6557
@@ -5658,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5658 return 0; 6603 return 0;
5659} 6604}
5660 6605
6606int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
6607{
6608 struct btrfs_inode *binode;
6609 struct inode *inode = NULL;
6610
6611 spin_lock(&root->fs_info->delalloc_lock);
6612 while (!list_empty(&root->fs_info->delalloc_inodes)) {
6613 binode = list_entry(root->fs_info->delalloc_inodes.next,
6614 struct btrfs_inode, delalloc_inodes);
6615 inode = igrab(&binode->vfs_inode);
6616 if (inode) {
6617 list_move_tail(&binode->delalloc_inodes,
6618 &root->fs_info->delalloc_inodes);
6619 break;
6620 }
6621
6622 list_del_init(&binode->delalloc_inodes);
6623 cond_resched_lock(&root->fs_info->delalloc_lock);
6624 }
6625 spin_unlock(&root->fs_info->delalloc_lock);
6626
6627 if (inode) {
6628 write_inode_now(inode, 0);
6629 if (delay_iput)
6630 btrfs_add_delayed_iput(inode);
6631 else
6632 iput(inode);
6633 return 1;
6634 }
6635 return 0;
6636}
6637
5661static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 6638static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5662 const char *symname) 6639 const char *symname)
5663{ 6640{
@@ -5681,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5681 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 6658 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
5682 return -ENAMETOOLONG; 6659 return -ENAMETOOLONG;
5683 6660
6661 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
6662 if (err)
6663 return err;
5684 /* 6664 /*
5685 * 2 items for inode item and ref 6665 * 2 items for inode item and ref
5686 * 2 items for dir items 6666 * 2 items for dir items
5687 * 1 item for xattr if selinux is on 6667 * 1 item for xattr if selinux is on
5688 */ 6668 */
5689 err = btrfs_reserve_metadata_space(root, 5); 6669 trans = btrfs_start_transaction(root, 5);
5690 if (err) 6670 if (IS_ERR(trans))
5691 return err; 6671 return PTR_ERR(trans);
5692 6672
5693 trans = btrfs_start_transaction(root, 1);
5694 if (!trans)
5695 goto out_fail;
5696 btrfs_set_trans_block_group(trans, dir); 6673 btrfs_set_trans_block_group(trans, dir);
5697 6674
5698 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
5699 if (err) {
5700 err = -ENOSPC;
5701 goto out_unlock;
5702 }
5703
5704 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 6675 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
5705 dentry->d_name.len, 6676 dentry->d_name.len,
5706 dentry->d_parent->d_inode->i_ino, objectid, 6677 dentry->d_parent->d_inode->i_ino, objectid,
@@ -5772,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5772out_unlock: 6743out_unlock:
5773 nr = trans->blocks_used; 6744 nr = trans->blocks_used;
5774 btrfs_end_transaction_throttle(trans, root); 6745 btrfs_end_transaction_throttle(trans, root);
5775out_fail:
5776 btrfs_unreserve_metadata_space(root, 5);
5777 if (drop_inode) { 6746 if (drop_inode) {
5778 inode_dec_link_count(inode); 6747 inode_dec_link_count(inode);
5779 iput(inode); 6748 iput(inode);
@@ -5782,36 +6751,28 @@ out_fail:
5782 return err; 6751 return err;
5783} 6752}
5784 6753
5785static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 6754int btrfs_prealloc_file_range(struct inode *inode, int mode,
5786 u64 alloc_hint, int mode, loff_t actual_len) 6755 u64 start, u64 num_bytes, u64 min_size,
6756 loff_t actual_len, u64 *alloc_hint)
5787{ 6757{
5788 struct btrfs_trans_handle *trans; 6758 struct btrfs_trans_handle *trans;
5789 struct btrfs_root *root = BTRFS_I(inode)->root; 6759 struct btrfs_root *root = BTRFS_I(inode)->root;
5790 struct btrfs_key ins; 6760 struct btrfs_key ins;
5791 u64 alloc_size;
5792 u64 cur_offset = start; 6761 u64 cur_offset = start;
5793 u64 num_bytes = end - start;
5794 int ret = 0; 6762 int ret = 0;
5795 u64 i_size;
5796 6763
5797 while (num_bytes > 0) { 6764 while (num_bytes > 0) {
5798 alloc_size = min(num_bytes, root->fs_info->max_extent); 6765 trans = btrfs_start_transaction(root, 3);
5799 6766 if (IS_ERR(trans)) {
5800 trans = btrfs_start_transaction(root, 1); 6767 ret = PTR_ERR(trans);
5801 6768 break;
5802 ret = btrfs_reserve_extent(trans, root, alloc_size,
5803 root->sectorsize, 0, alloc_hint,
5804 (u64)-1, &ins, 1);
5805 if (ret) {
5806 WARN_ON(1);
5807 goto stop_trans;
5808 } 6769 }
5809 6770
5810 ret = btrfs_reserve_metadata_space(root, 3); 6771 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6772 0, *alloc_hint, (u64)-1, &ins, 1);
5811 if (ret) { 6773 if (ret) {
5812 btrfs_free_reserved_extent(root, ins.objectid, 6774 btrfs_end_transaction(trans, root);
5813 ins.offset); 6775 break;
5814 goto stop_trans;
5815 } 6776 }
5816 6777
5817 ret = insert_reserved_file_extent(trans, inode, 6778 ret = insert_reserved_file_extent(trans, inode,
@@ -5825,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5825 6786
5826 num_bytes -= ins.offset; 6787 num_bytes -= ins.offset;
5827 cur_offset += ins.offset; 6788 cur_offset += ins.offset;
5828 alloc_hint = ins.objectid + ins.offset; 6789 *alloc_hint = ins.objectid + ins.offset;
5829 6790
5830 inode->i_ctime = CURRENT_TIME; 6791 inode->i_ctime = CURRENT_TIME;
5831 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 6792 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5832 if (!(mode & FALLOC_FL_KEEP_SIZE) && 6793 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5833 (actual_len > inode->i_size) && 6794 (actual_len > inode->i_size) &&
5834 (cur_offset > inode->i_size)) { 6795 (cur_offset > inode->i_size)) {
5835
5836 if (cur_offset > actual_len) 6796 if (cur_offset > actual_len)
5837 i_size = actual_len; 6797 i_size_write(inode, actual_len);
5838 else 6798 else
5839 i_size = cur_offset; 6799 i_size_write(inode, cur_offset);
5840 i_size_write(inode, i_size); 6800 i_size_write(inode, cur_offset);
5841 btrfs_ordered_update_i_size(inode, i_size, NULL); 6801 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5842 } 6802 }
5843 6803
5844 ret = btrfs_update_inode(trans, root, inode); 6804 ret = btrfs_update_inode(trans, root, inode);
5845 BUG_ON(ret); 6805 BUG_ON(ret);
5846 6806
5847 btrfs_end_transaction(trans, root); 6807 btrfs_end_transaction(trans, root);
5848 btrfs_unreserve_metadata_space(root, 3);
5849 } 6808 }
5850 return ret; 6809 return ret;
5851
5852stop_trans:
5853 btrfs_end_transaction(trans, root);
5854 return ret;
5855
5856} 6810}
5857 6811
5858static long btrfs_fallocate(struct inode *inode, int mode, 6812static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5885,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5885 goto out; 6839 goto out;
5886 } 6840 }
5887 6841
5888 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, 6842 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
5889 alloc_end - alloc_start);
5890 if (ret) 6843 if (ret)
5891 goto out; 6844 goto out;
5892 6845
@@ -5931,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5931 if (em->block_start == EXTENT_MAP_HOLE || 6884 if (em->block_start == EXTENT_MAP_HOLE ||
5932 (cur_offset >= inode->i_size && 6885 (cur_offset >= inode->i_size &&
5933 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6886 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5934 ret = prealloc_file_range(inode, 6887 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
5935 cur_offset, last_byte, 6888 last_byte - cur_offset,
5936 alloc_hint, mode, offset+len); 6889 1 << inode->i_blkbits,
6890 offset + len,
6891 &alloc_hint);
5937 if (ret < 0) { 6892 if (ret < 0) {
5938 free_extent_map(em); 6893 free_extent_map(em);
5939 break; 6894 break;
5940 } 6895 }
5941 } 6896 }
5942 if (em->block_start <= EXTENT_MAP_LAST_BYTE)
5943 alloc_hint = em->block_start;
5944 free_extent_map(em); 6897 free_extent_map(em);
5945 6898
5946 cur_offset = last_byte; 6899 cur_offset = last_byte;
@@ -5952,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5952 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 6905 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5953 &cached_state, GFP_NOFS); 6906 &cached_state, GFP_NOFS);
5954 6907
5955 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 6908 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
5956 alloc_end - alloc_start);
5957out: 6909out:
5958 mutex_unlock(&inode->i_mutex); 6910 mutex_unlock(&inode->i_mutex);
5959 return ret; 6911 return ret;