aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorMichal Marek <mmarek@suse.cz>2010-10-27 18:15:57 -0400
committerMichal Marek <mmarek@suse.cz>2010-10-27 18:15:57 -0400
commitb74b953b998bcc2db91b694446f3a2619ec32de6 (patch)
tree6ce24caabd730f6ae9287ed0676ec32e6ff31e9d /fs/btrfs/inode.c
parentabb438526201c6a79949ad45375c051b6681c253 (diff)
parentf6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff)
Merge commit 'v2.6.36' into kbuild/misc
Update to be able to fix a recent change to scripts/basic/docproc.c (commit eda603f).
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1997
1 files changed, 1474 insertions, 523 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8cd109972fa6..c03864406af3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -251,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
251 inline_len, compressed_size, 252 inline_len, compressed_size,
252 compressed_pages); 253 compressed_pages);
253 BUG_ON(ret); 254 BUG_ON(ret);
255 btrfs_delalloc_release_metadata(inode, end + 1 - start);
254 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 256 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
255 return 0; 257 return 0;
256} 258}
@@ -379,7 +381,8 @@ again:
379 * change at any time if we discover bad compression ratios. 381 * change at any time if we discover bad compression ratios.
380 */ 382 */
381 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 383 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
382 btrfs_test_opt(root, COMPRESS)) { 384 (btrfs_test_opt(root, COMPRESS) ||
385 (BTRFS_I(inode)->force_compress))) {
383 WARN_ON(pages); 386 WARN_ON(pages);
384 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 387 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
385 388
@@ -412,6 +415,7 @@ again:
412 trans = btrfs_join_transaction(root, 1); 415 trans = btrfs_join_transaction(root, 1);
413 BUG_ON(!trans); 416 BUG_ON(!trans);
414 btrfs_set_trans_block_group(trans, inode); 417 btrfs_set_trans_block_group(trans, inode);
418 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
415 419
416 /* lets try to make an inline extent */ 420 /* lets try to make an inline extent */
417 if (ret || total_in < (actual_end - start)) { 421 if (ret || total_in < (actual_end - start)) {
@@ -437,7 +441,6 @@ again:
437 start, end, NULL, 441 start, end, NULL,
438 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | 442 EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
439 EXTENT_CLEAR_DELALLOC | 443 EXTENT_CLEAR_DELALLOC |
440 EXTENT_CLEAR_ACCOUNTING |
441 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); 444 EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
442 445
443 btrfs_end_transaction(trans, root); 446 btrfs_end_transaction(trans, root);
@@ -483,8 +486,10 @@ again:
483 nr_pages_ret = 0; 486 nr_pages_ret = 0;
484 487
485 /* flag the file so we don't compress in the future */ 488 /* flag the file so we don't compress in the future */
486 if (!btrfs_test_opt(root, FORCE_COMPRESS)) 489 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
490 !(BTRFS_I(inode)->force_compress)) {
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 491 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
492 }
488 } 493 }
489 if (will_compress) { 494 if (will_compress) {
490 *num_added += 1; 495 *num_added += 1;
@@ -570,8 +575,8 @@ retry:
570 unsigned long nr_written = 0; 575 unsigned long nr_written = 0;
571 576
572 lock_extent(io_tree, async_extent->start, 577 lock_extent(io_tree, async_extent->start,
573 async_extent->start + 578 async_extent->start +
574 async_extent->ram_size - 1, GFP_NOFS); 579 async_extent->ram_size - 1, GFP_NOFS);
575 580
576 /* allocate blocks */ 581 /* allocate blocks */
577 ret = cow_file_range(inode, async_cow->locked_page, 582 ret = cow_file_range(inode, async_cow->locked_page,
@@ -693,6 +698,38 @@ retry:
693 return 0; 698 return 0;
694} 699}
695 700
701static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
702 u64 num_bytes)
703{
704 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
705 struct extent_map *em;
706 u64 alloc_hint = 0;
707
708 read_lock(&em_tree->lock);
709 em = search_extent_mapping(em_tree, start, num_bytes);
710 if (em) {
711 /*
712 * if block start isn't an actual block number then find the
713 * first block in this inode and use that as a hint. If that
714 * block is also bogus then just don't worry about it.
715 */
716 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
717 free_extent_map(em);
718 em = search_extent_mapping(em_tree, 0, 0);
719 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
720 alloc_hint = em->block_start;
721 if (em)
722 free_extent_map(em);
723 } else {
724 alloc_hint = em->block_start;
725 free_extent_map(em);
726 }
727 }
728 read_unlock(&em_tree->lock);
729
730 return alloc_hint;
731}
732
696/* 733/*
697 * when extent_io.c finds a delayed allocation range in the file, 734 * when extent_io.c finds a delayed allocation range in the file,
698 * the call backs end up in this code. The basic idea is to 735 * the call backs end up in this code. The basic idea is to
@@ -730,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode,
730 trans = btrfs_join_transaction(root, 1); 767 trans = btrfs_join_transaction(root, 1);
731 BUG_ON(!trans); 768 BUG_ON(!trans);
732 btrfs_set_trans_block_group(trans, inode); 769 btrfs_set_trans_block_group(trans, inode);
770 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
733 771
734 actual_end = min_t(u64, isize, end + 1); 772 actual_end = min_t(u64, isize, end + 1);
735 773
@@ -749,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode,
749 EXTENT_CLEAR_UNLOCK_PAGE | 787 EXTENT_CLEAR_UNLOCK_PAGE |
750 EXTENT_CLEAR_UNLOCK | 788 EXTENT_CLEAR_UNLOCK |
751 EXTENT_CLEAR_DELALLOC | 789 EXTENT_CLEAR_DELALLOC |
752 EXTENT_CLEAR_ACCOUNTING |
753 EXTENT_CLEAR_DIRTY | 790 EXTENT_CLEAR_DIRTY |
754 EXTENT_SET_WRITEBACK | 791 EXTENT_SET_WRITEBACK |
755 EXTENT_END_WRITEBACK); 792 EXTENT_END_WRITEBACK);
@@ -765,35 +802,13 @@ static noinline int cow_file_range(struct inode *inode,
765 BUG_ON(disk_num_bytes > 802 BUG_ON(disk_num_bytes >
766 btrfs_super_total_bytes(&root->fs_info->super_copy)); 803 btrfs_super_total_bytes(&root->fs_info->super_copy));
767 804
768 805 alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
769 read_lock(&BTRFS_I(inode)->extent_tree.lock);
770 em = search_extent_mapping(&BTRFS_I(inode)->extent_tree,
771 start, num_bytes);
772 if (em) {
773 /*
774 * if block start isn't an actual block number then find the
775 * first block in this inode and use that as a hint. If that
776 * block is also bogus then just don't worry about it.
777 */
778 if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
779 free_extent_map(em);
780 em = search_extent_mapping(em_tree, 0, 0);
781 if (em && em->block_start < EXTENT_MAP_LAST_BYTE)
782 alloc_hint = em->block_start;
783 if (em)
784 free_extent_map(em);
785 } else {
786 alloc_hint = em->block_start;
787 free_extent_map(em);
788 }
789 }
790 read_unlock(&BTRFS_I(inode)->extent_tree.lock);
791 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); 806 btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
792 807
793 while (disk_num_bytes > 0) { 808 while (disk_num_bytes > 0) {
794 unsigned long op; 809 unsigned long op;
795 810
796 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 811 cur_alloc_size = disk_num_bytes;
797 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 812 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
798 root->sectorsize, 0, alloc_hint, 813 root->sectorsize, 0, alloc_hint,
799 (u64)-1, &ins, 1); 814 (u64)-1, &ins, 1);
@@ -1170,6 +1185,13 @@ out_check:
1170 num_bytes, num_bytes, type); 1185 num_bytes, num_bytes, type);
1171 BUG_ON(ret); 1186 BUG_ON(ret);
1172 1187
1188 if (root->root_key.objectid ==
1189 BTRFS_DATA_RELOC_TREE_OBJECTID) {
1190 ret = btrfs_reloc_clone_csums(inode, cur_offset,
1191 num_bytes);
1192 BUG_ON(ret);
1193 }
1194
1173 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, 1195 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
1174 cur_offset, cur_offset + num_bytes - 1, 1196 cur_offset, cur_offset + num_bytes - 1,
1175 locked_page, EXTENT_CLEAR_UNLOCK_PAGE | 1197 locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
@@ -1211,7 +1233,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1211 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1233 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1212 ret = run_delalloc_nocow(inode, locked_page, start, end, 1234 ret = run_delalloc_nocow(inode, locked_page, start, end,
1213 page_started, 0, nr_written); 1235 page_started, 0, nr_written);
1214 else if (!btrfs_test_opt(root, COMPRESS)) 1236 else if (!btrfs_test_opt(root, COMPRESS) &&
1237 !(BTRFS_I(inode)->force_compress))
1215 ret = cow_file_range(inode, locked_page, start, end, 1238 ret = cow_file_range(inode, locked_page, start, end,
1216 page_started, nr_written, 1); 1239 page_started, nr_written, 1);
1217 else 1240 else
@@ -1221,36 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1221} 1244}
1222 1245
1223static int btrfs_split_extent_hook(struct inode *inode, 1246static int btrfs_split_extent_hook(struct inode *inode,
1224 struct extent_state *orig, u64 split) 1247 struct extent_state *orig, u64 split)
1225{ 1248{
1226 struct btrfs_root *root = BTRFS_I(inode)->root; 1249 /* not delalloc, ignore it */
1227 u64 size;
1228
1229 if (!(orig->state & EXTENT_DELALLOC)) 1250 if (!(orig->state & EXTENT_DELALLOC))
1230 return 0; 1251 return 0;
1231 1252
1232 size = orig->end - orig->start + 1; 1253 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1233 if (size > root->fs_info->max_extent) {
1234 u64 num_extents;
1235 u64 new_size;
1236
1237 new_size = orig->end - split + 1;
1238 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1239 root->fs_info->max_extent);
1240
1241 /*
1242 * if we break a large extent up then leave oustanding_extents
1243 * be, since we've already accounted for the large extent.
1244 */
1245 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1246 root->fs_info->max_extent) < num_extents)
1247 return 0;
1248 }
1249
1250 spin_lock(&BTRFS_I(inode)->accounting_lock);
1251 BTRFS_I(inode)->outstanding_extents++;
1252 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1253
1254 return 0; 1254 return 0;
1255} 1255}
1256 1256
@@ -1264,42 +1264,11 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1264 struct extent_state *new, 1264 struct extent_state *new,
1265 struct extent_state *other) 1265 struct extent_state *other)
1266{ 1266{
1267 struct btrfs_root *root = BTRFS_I(inode)->root;
1268 u64 new_size, old_size;
1269 u64 num_extents;
1270
1271 /* not delalloc, ignore it */ 1267 /* not delalloc, ignore it */
1272 if (!(other->state & EXTENT_DELALLOC)) 1268 if (!(other->state & EXTENT_DELALLOC))
1273 return 0; 1269 return 0;
1274 1270
1275 old_size = other->end - other->start + 1; 1271 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1276 if (new->start < other->start)
1277 new_size = other->end - new->start + 1;
1278 else
1279 new_size = new->end - other->start + 1;
1280
1281 /* we're not bigger than the max, unreserve the space and go */
1282 if (new_size <= root->fs_info->max_extent) {
1283 spin_lock(&BTRFS_I(inode)->accounting_lock);
1284 BTRFS_I(inode)->outstanding_extents--;
1285 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1286 return 0;
1287 }
1288
1289 /*
1290 * If we grew by another max_extent, just return, we want to keep that
1291 * reserved amount.
1292 */
1293 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1294 root->fs_info->max_extent);
1295 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1296 root->fs_info->max_extent) > num_extents)
1297 return 0;
1298
1299 spin_lock(&BTRFS_I(inode)->accounting_lock);
1300 BTRFS_I(inode)->outstanding_extents--;
1301 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1302
1303 return 0; 1272 return 0;
1304} 1273}
1305 1274
@@ -1308,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1308 * bytes in this file, and to maintain the list of inodes that 1277 * bytes in this file, and to maintain the list of inodes that
1309 * have pending delalloc work to be done. 1278 * have pending delalloc work to be done.
1310 */ 1279 */
1311static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, 1280static int btrfs_set_bit_hook(struct inode *inode,
1312 unsigned long old, unsigned long bits) 1281 struct extent_state *state, int *bits)
1313{ 1282{
1314 1283
1315 /* 1284 /*
@@ -1317,16 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1317 * but in this case, we are only testeing for the DELALLOC 1286 * but in this case, we are only testeing for the DELALLOC
1318 * bit, which is only set or cleared with irqs on 1287 * bit, which is only set or cleared with irqs on
1319 */ 1288 */
1320 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1321 struct btrfs_root *root = BTRFS_I(inode)->root; 1290 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start;
1292
1293 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC;
1295 else
1296 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
1322 1297
1323 spin_lock(&BTRFS_I(inode)->accounting_lock);
1324 BTRFS_I(inode)->outstanding_extents++;
1325 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1326 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1327 spin_lock(&root->fs_info->delalloc_lock); 1298 spin_lock(&root->fs_info->delalloc_lock);
1328 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1299 BTRFS_I(inode)->delalloc_bytes += len;
1329 root->fs_info->delalloc_bytes += end - start + 1; 1300 root->fs_info->delalloc_bytes += len;
1330 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1331 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1332 &root->fs_info->delalloc_inodes); 1303 &root->fs_info->delalloc_inodes);
@@ -1340,44 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1340 * extent_io.c clear_bit_hook, see set_bit_hook for why 1311 * extent_io.c clear_bit_hook, see set_bit_hook for why
1341 */ 1312 */
1342static int btrfs_clear_bit_hook(struct inode *inode, 1313static int btrfs_clear_bit_hook(struct inode *inode,
1343 struct extent_state *state, unsigned long bits) 1314 struct extent_state *state, int *bits)
1344{ 1315{
1345 /* 1316 /*
1346 * set_bit and clear bit hooks normally require _irqsave/restore 1317 * set_bit and clear bit hooks normally require _irqsave/restore
1347 * but in this case, we are only testeing for the DELALLOC 1318 * but in this case, we are only testeing for the DELALLOC
1348 * bit, which is only set or cleared with irqs on 1319 * bit, which is only set or cleared with irqs on
1349 */ 1320 */
1350 if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1351 struct btrfs_root *root = BTRFS_I(inode)->root; 1322 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start;
1352 1324
1353 if (bits & EXTENT_DO_ACCOUNTING) { 1325 if (*bits & EXTENT_FIRST_DELALLOC)
1354 spin_lock(&BTRFS_I(inode)->accounting_lock); 1326 *bits &= ~EXTENT_FIRST_DELALLOC;
1355 BTRFS_I(inode)->outstanding_extents--; 1327 else if (!(*bits & EXTENT_DO_ACCOUNTING))
1356 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1328 atomic_dec(&BTRFS_I(inode)->outstanding_extents);
1357 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1329
1358 } 1330 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len);
1332
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
1334 btrfs_free_reserved_data_space(inode, len);
1359 1335
1360 spin_lock(&root->fs_info->delalloc_lock); 1336 spin_lock(&root->fs_info->delalloc_lock);
1361 if (state->end - state->start + 1 > 1337 root->fs_info->delalloc_bytes -= len;
1362 root->fs_info->delalloc_bytes) { 1338 BTRFS_I(inode)->delalloc_bytes -= len;
1363 printk(KERN_INFO "btrfs warning: delalloc account " 1339
1364 "%llu %llu\n",
1365 (unsigned long long)
1366 state->end - state->start + 1,
1367 (unsigned long long)
1368 root->fs_info->delalloc_bytes);
1369 btrfs_delalloc_free_space(root, inode, (u64)-1);
1370 root->fs_info->delalloc_bytes = 0;
1371 BTRFS_I(inode)->delalloc_bytes = 0;
1372 } else {
1373 btrfs_delalloc_free_space(root, inode,
1374 state->end -
1375 state->start + 1);
1376 root->fs_info->delalloc_bytes -= state->end -
1377 state->start + 1;
1378 BTRFS_I(inode)->delalloc_bytes -= state->end -
1379 state->start + 1;
1380 }
1381 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1340 if (BTRFS_I(inode)->delalloc_bytes == 0 &&
1382 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1383 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
@@ -1426,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1426 */ 1385 */
1427static int __btrfs_submit_bio_start(struct inode *inode, int rw, 1386static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1428 struct bio *bio, int mirror_num, 1387 struct bio *bio, int mirror_num,
1429 unsigned long bio_flags) 1388 unsigned long bio_flags,
1389 u64 bio_offset)
1430{ 1390{
1431 struct btrfs_root *root = BTRFS_I(inode)->root; 1391 struct btrfs_root *root = BTRFS_I(inode)->root;
1432 int ret = 0; 1392 int ret = 0;
@@ -1445,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw,
1445 * are inserted into the btree 1405 * are inserted into the btree
1446 */ 1406 */
1447static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, 1407static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1448 int mirror_num, unsigned long bio_flags) 1408 int mirror_num, unsigned long bio_flags,
1409 u64 bio_offset)
1449{ 1410{
1450 struct btrfs_root *root = BTRFS_I(inode)->root; 1411 struct btrfs_root *root = BTRFS_I(inode)->root;
1451 return btrfs_map_bio(root, rw, bio, mirror_num, 1); 1412 return btrfs_map_bio(root, rw, bio, mirror_num, 1);
@@ -1456,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
1456 * on write, or reading the csums from the tree before a read 1417 * on write, or reading the csums from the tree before a read
1457 */ 1418 */
1458static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, 1419static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1459 int mirror_num, unsigned long bio_flags) 1420 int mirror_num, unsigned long bio_flags,
1421 u64 bio_offset)
1460{ 1422{
1461 struct btrfs_root *root = BTRFS_I(inode)->root; 1423 struct btrfs_root *root = BTRFS_I(inode)->root;
1462 int ret = 0; 1424 int ret = 0;
@@ -1467,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1467 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1468 BUG_ON(ret); 1430 BUG_ON(ret);
1469 1431
1470 if (!(rw & (1 << BIO_RW))) { 1432 if (!(rw & REQ_WRITE)) {
1471 if (bio_flags & EXTENT_BIO_COMPRESSED) { 1433 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1472 return btrfs_submit_compressed_read(inode, bio, 1434 return btrfs_submit_compressed_read(inode, bio,
1473 mirror_num, bio_flags); 1435 mirror_num, bio_flags);
@@ -1481,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1481 /* we're doing a write, do the async checksumming */ 1443 /* we're doing a write, do the async checksumming */
1482 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, 1444 return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
1483 inode, rw, bio, mirror_num, 1445 inode, rw, bio, mirror_num,
1484 bio_flags, __btrfs_submit_bio_start, 1446 bio_flags, bio_offset,
1447 __btrfs_submit_bio_start,
1485 __btrfs_submit_bio_done); 1448 __btrfs_submit_bio_done);
1486 } 1449 }
1487 1450
@@ -1508,12 +1471,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1508 return 0; 1471 return 0;
1509} 1472}
1510 1473
1511int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1474int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1475 struct extent_state **cached_state)
1512{ 1476{
1513 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) 1477 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1514 WARN_ON(1); 1478 WARN_ON(1);
1515 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1479 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1516 GFP_NOFS); 1480 cached_state, GFP_NOFS);
1517} 1481}
1518 1482
1519/* see btrfs_writepage_start_hook for details on why this is required */ 1483/* see btrfs_writepage_start_hook for details on why this is required */
@@ -1526,6 +1490,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1526{ 1490{
1527 struct btrfs_writepage_fixup *fixup; 1491 struct btrfs_writepage_fixup *fixup;
1528 struct btrfs_ordered_extent *ordered; 1492 struct btrfs_ordered_extent *ordered;
1493 struct extent_state *cached_state = NULL;
1529 struct page *page; 1494 struct page *page;
1530 struct inode *inode; 1495 struct inode *inode;
1531 u64 page_start; 1496 u64 page_start;
@@ -1544,7 +1509,8 @@ again:
1544 page_start = page_offset(page); 1509 page_start = page_offset(page);
1545 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1510 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1546 1511
1547 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1512 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1513 &cached_state, GFP_NOFS);
1548 1514
1549 /* already ordered? We're done */ 1515 /* already ordered? We're done */
1550 if (PagePrivate2(page)) 1516 if (PagePrivate2(page))
@@ -1552,17 +1518,19 @@ again:
1552 1518
1553 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1519 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1554 if (ordered) { 1520 if (ordered) {
1555 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, 1521 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1556 page_end, GFP_NOFS); 1522 page_end, &cached_state, GFP_NOFS);
1557 unlock_page(page); 1523 unlock_page(page);
1558 btrfs_start_ordered_extent(inode, ordered, 1); 1524 btrfs_start_ordered_extent(inode, ordered, 1);
1559 goto again; 1525 goto again;
1560 } 1526 }
1561 1527
1562 btrfs_set_extent_delalloc(inode, page_start, page_end); 1528 BUG();
1529 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1563 ClearPageChecked(page); 1530 ClearPageChecked(page);
1564out: 1531out:
1565 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1532 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1533 &cached_state, GFP_NOFS);
1566out_page: 1534out_page:
1567 unlock_page(page); 1535 unlock_page(page);
1568 page_cache_release(page); 1536 page_cache_release(page);
@@ -1681,24 +1649,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1681 * before we start the transaction. It limits the amount of btree 1649 * before we start the transaction. It limits the amount of btree
1682 * reads required while inside the transaction. 1650 * reads required while inside the transaction.
1683 */ 1651 */
1684static noinline void reada_csum(struct btrfs_root *root,
1685 struct btrfs_path *path,
1686 struct btrfs_ordered_extent *ordered_extent)
1687{
1688 struct btrfs_ordered_sum *sum;
1689 u64 bytenr;
1690
1691 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1692 list);
1693 bytenr = sum->sums[0].bytenr;
1694
1695 /*
1696 * we don't care about the results, the point of this search is
1697 * just to get the btree leaves into ram
1698 */
1699 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1700}
1701
1702/* as ordered data IO finishes, this gets called so we can finish 1652/* as ordered data IO finishes, this gets called so we can finish
1703 * an ordered extent if the range of bytes in the file it covers are 1653 * an ordered extent if the range of bytes in the file it covers are
1704 * fully written. 1654 * fully written.
@@ -1706,60 +1656,39 @@ static noinline void reada_csum(struct btrfs_root *root,
1706static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) 1656static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1707{ 1657{
1708 struct btrfs_root *root = BTRFS_I(inode)->root; 1658 struct btrfs_root *root = BTRFS_I(inode)->root;
1709 struct btrfs_trans_handle *trans; 1659 struct btrfs_trans_handle *trans = NULL;
1710 struct btrfs_ordered_extent *ordered_extent = NULL; 1660 struct btrfs_ordered_extent *ordered_extent = NULL;
1711 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1661 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1712 struct btrfs_path *path; 1662 struct extent_state *cached_state = NULL;
1713 int compressed = 0; 1663 int compressed = 0;
1714 int ret; 1664 int ret;
1715 1665
1716 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 1666 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1667 end - start + 1);
1717 if (!ret) 1668 if (!ret)
1718 return 0; 1669 return 0;
1719
1720 /*
1721 * before we join the transaction, try to do some of our IO.
1722 * This will limit the amount of IO that we have to do with
1723 * the transaction running. We're unlikely to need to do any
1724 * IO if the file extents are new, the disk_i_size checks
1725 * covers the most common case.
1726 */
1727 if (start < BTRFS_I(inode)->disk_i_size) {
1728 path = btrfs_alloc_path();
1729 if (path) {
1730 ret = btrfs_lookup_file_extent(NULL, root, path,
1731 inode->i_ino,
1732 start, 0);
1733 ordered_extent = btrfs_lookup_ordered_extent(inode,
1734 start);
1735 if (!list_empty(&ordered_extent->list)) {
1736 btrfs_release_path(root, path);
1737 reada_csum(root, path, ordered_extent);
1738 }
1739 btrfs_free_path(path);
1740 }
1741 }
1742
1743 if (!ordered_extent)
1744 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1745 BUG_ON(!ordered_extent); 1670 BUG_ON(!ordered_extent);
1671
1746 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1672 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1747 BUG_ON(!list_empty(&ordered_extent->list)); 1673 BUG_ON(!list_empty(&ordered_extent->list));
1748 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1749 if (!ret) { 1675 if (!ret) {
1750 trans = btrfs_join_transaction(root, 1); 1676 trans = btrfs_join_transaction(root, 1);
1677 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1751 ret = btrfs_update_inode(trans, root, inode); 1679 ret = btrfs_update_inode(trans, root, inode);
1752 BUG_ON(ret); 1680 BUG_ON(ret);
1753 btrfs_end_transaction(trans, root);
1754 } 1681 }
1755 goto out; 1682 goto out;
1756 } 1683 }
1757 1684
1758 lock_extent(io_tree, ordered_extent->file_offset, 1685 lock_extent_bits(io_tree, ordered_extent->file_offset,
1759 ordered_extent->file_offset + ordered_extent->len - 1, 1686 ordered_extent->file_offset + ordered_extent->len - 1,
1760 GFP_NOFS); 1687 0, &cached_state, GFP_NOFS);
1761 1688
1762 trans = btrfs_join_transaction(root, 1); 1689 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1763 1692
1764 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) 1693 if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
1765 compressed = 1; 1694 compressed = 1;
@@ -1784,18 +1713,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1784 ordered_extent->len); 1713 ordered_extent->len);
1785 BUG_ON(ret); 1714 BUG_ON(ret);
1786 } 1715 }
1787 unlock_extent(io_tree, ordered_extent->file_offset, 1716 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1788 ordered_extent->file_offset + ordered_extent->len - 1, 1717 ordered_extent->file_offset +
1789 GFP_NOFS); 1718 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1719
1790 add_pending_csums(trans, inode, ordered_extent->file_offset, 1720 add_pending_csums(trans, inode, ordered_extent->file_offset,
1791 &ordered_extent->list); 1721 &ordered_extent->list);
1792 1722
1793 /* this also removes the ordered extent from the tree */
1794 btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1723 btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1795 ret = btrfs_update_inode(trans, root, inode); 1724 ret = btrfs_update_inode(trans, root, inode);
1796 BUG_ON(ret); 1725 BUG_ON(ret);
1797 btrfs_end_transaction(trans, root);
1798out: 1726out:
1727 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1728 if (trans)
1729 btrfs_end_transaction(trans, root);
1799 /* once for us */ 1730 /* once for us */
1800 btrfs_put_ordered_extent(ordered_extent); 1731 btrfs_put_ordered_extent(ordered_extent);
1801 /* once for the tree */ 1732 /* once for the tree */
@@ -1910,14 +1841,14 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1910 bio->bi_size = 0; 1841 bio->bi_size = 0;
1911 1842
1912 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 1843 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1913 if (failed_bio->bi_rw & (1 << BIO_RW)) 1844 if (failed_bio->bi_rw & REQ_WRITE)
1914 rw = WRITE; 1845 rw = WRITE;
1915 else 1846 else
1916 rw = READ; 1847 rw = READ;
1917 1848
1918 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1849 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1919 failrec->last_mirror, 1850 failrec->last_mirror,
1920 failrec->bio_flags); 1851 failrec->bio_flags, 0);
1921 return 0; 1852 return 0;
1922} 1853}
1923 1854
@@ -2072,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2072} 2003}
2073 2004
2074/* 2005/*
2006 * calculate extra metadata reservation when snapshotting a subvolume
2007 * contains orphan files.
2008 */
2009void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
2010 struct btrfs_pending_snapshot *pending,
2011 u64 *bytes_to_reserve)
2012{
2013 struct btrfs_root *root;
2014 struct btrfs_block_rsv *block_rsv;
2015 u64 num_bytes;
2016 int index;
2017
2018 root = pending->root;
2019 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2020 return;
2021
2022 block_rsv = root->orphan_block_rsv;
2023
2024 /* orphan block reservation for the snapshot */
2025 num_bytes = block_rsv->size;
2026
2027 /*
2028 * after the snapshot is created, COWing tree blocks may use more
2029 * space than it frees. So we should make sure there is enough
2030 * reserved space.
2031 */
2032 index = trans->transid & 0x1;
2033 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2034 num_bytes += block_rsv->size -
2035 (block_rsv->reserved + block_rsv->freed[index]);
2036 }
2037
2038 *bytes_to_reserve += num_bytes;
2039}
2040
2041void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
2042 struct btrfs_pending_snapshot *pending)
2043{
2044 struct btrfs_root *root = pending->root;
2045 struct btrfs_root *snap = pending->snap;
2046 struct btrfs_block_rsv *block_rsv;
2047 u64 num_bytes;
2048 int index;
2049 int ret;
2050
2051 if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
2052 return;
2053
2054 /* refill source subvolume's orphan block reservation */
2055 block_rsv = root->orphan_block_rsv;
2056 index = trans->transid & 0x1;
2057 if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
2058 num_bytes = block_rsv->size -
2059 (block_rsv->reserved + block_rsv->freed[index]);
2060 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2061 root->orphan_block_rsv,
2062 num_bytes);
2063 BUG_ON(ret);
2064 }
2065
2066 /* setup orphan block reservation for the snapshot */
2067 block_rsv = btrfs_alloc_block_rsv(snap);
2068 BUG_ON(!block_rsv);
2069
2070 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2071 snap->orphan_block_rsv = block_rsv;
2072
2073 num_bytes = root->orphan_block_rsv->size;
2074 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
2075 block_rsv, num_bytes);
2076 BUG_ON(ret);
2077
2078#if 0
2079 /* insert orphan item for the snapshot */
2080 WARN_ON(!root->orphan_item_inserted);
2081 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2082 snap->root_key.objectid);
2083 BUG_ON(ret);
2084 snap->orphan_item_inserted = 1;
2085#endif
2086}
2087
2088enum btrfs_orphan_cleanup_state {
2089 ORPHAN_CLEANUP_STARTED = 1,
2090 ORPHAN_CLEANUP_DONE = 2,
2091};
2092
2093/*
2094 * This is called in transaction commmit time. If there are no orphan
2095 * files in the subvolume, it removes orphan item and frees block_rsv
2096 * structure.
2097 */
2098void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
2099 struct btrfs_root *root)
2100{
2101 int ret;
2102
2103 if (!list_empty(&root->orphan_list) ||
2104 root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
2105 return;
2106
2107 if (root->orphan_item_inserted &&
2108 btrfs_root_refs(&root->root_item) > 0) {
2109 ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
2110 root->root_key.objectid);
2111 BUG_ON(ret);
2112 root->orphan_item_inserted = 0;
2113 }
2114
2115 if (root->orphan_block_rsv) {
2116 WARN_ON(root->orphan_block_rsv->size > 0);
2117 btrfs_free_block_rsv(root, root->orphan_block_rsv);
2118 root->orphan_block_rsv = NULL;
2119 }
2120}
2121
2122/*
2075 * This creates an orphan entry for the given inode in case something goes 2123 * This creates an orphan entry for the given inode in case something goes
2076 * wrong in the middle of an unlink/truncate. 2124 * wrong in the middle of an unlink/truncate.
2125 *
2126 * NOTE: caller of this function should reserve 5 units of metadata for
2127 * this function.
2077 */ 2128 */
2078int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) 2129int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2079{ 2130{
2080 struct btrfs_root *root = BTRFS_I(inode)->root; 2131 struct btrfs_root *root = BTRFS_I(inode)->root;
2081 int ret = 0; 2132 struct btrfs_block_rsv *block_rsv = NULL;
2133 int reserve = 0;
2134 int insert = 0;
2135 int ret;
2082 2136
2083 spin_lock(&root->list_lock); 2137 if (!root->orphan_block_rsv) {
2138 block_rsv = btrfs_alloc_block_rsv(root);
2139 BUG_ON(!block_rsv);
2140 }
2084 2141
2085 /* already on the orphan list, we're good */ 2142 spin_lock(&root->orphan_lock);
2086 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 2143 if (!root->orphan_block_rsv) {
2087 spin_unlock(&root->list_lock); 2144 root->orphan_block_rsv = block_rsv;
2088 return 0; 2145 } else if (block_rsv) {
2146 btrfs_free_block_rsv(root, block_rsv);
2147 block_rsv = NULL;
2089 } 2148 }
2090 2149
2091 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2150 if (list_empty(&BTRFS_I(inode)->i_orphan)) {
2151 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2152#if 0
2153 /*
2154 * For proper ENOSPC handling, we should do orphan
2155 * cleanup when mounting. But this introduces backward
2156 * compatibility issue.
2157 */
2158 if (!xchg(&root->orphan_item_inserted, 1))
2159 insert = 2;
2160 else
2161 insert = 1;
2162#endif
2163 insert = 1;
2164 } else {
2165 WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved);
2166 }
2092 2167
2093 spin_unlock(&root->list_lock); 2168 if (!BTRFS_I(inode)->orphan_meta_reserved) {
2169 BTRFS_I(inode)->orphan_meta_reserved = 1;
2170 reserve = 1;
2171 }
2172 spin_unlock(&root->orphan_lock);
2094 2173
2095 /* 2174 if (block_rsv)
2096 * insert an orphan item to track this unlinked/truncated file 2175 btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
2097 */
2098 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
2099 2176
2100 return ret; 2177 /* grab metadata reservation from transaction handle */
2178 if (reserve) {
2179 ret = btrfs_orphan_reserve_metadata(trans, inode);
2180 BUG_ON(ret);
2181 }
2182
2183 /* insert an orphan item to track this unlinked/truncated file */
2184 if (insert >= 1) {
2185 ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
2186 BUG_ON(ret);
2187 }
2188
2189 /* insert an orphan item to track subvolume contains orphan files */
2190 if (insert >= 2) {
2191 ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
2192 root->root_key.objectid);
2193 BUG_ON(ret);
2194 }
2195 return 0;
2101} 2196}
2102 2197
2103/* 2198/*
@@ -2107,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2107int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) 2202int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
2108{ 2203{
2109 struct btrfs_root *root = BTRFS_I(inode)->root; 2204 struct btrfs_root *root = BTRFS_I(inode)->root;
2205 int delete_item = 0;
2206 int release_rsv = 0;
2110 int ret = 0; 2207 int ret = 0;
2111 2208
2112 spin_lock(&root->list_lock); 2209 spin_lock(&root->orphan_lock);
2113 2210 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
2114 if (list_empty(&BTRFS_I(inode)->i_orphan)) { 2211 list_del_init(&BTRFS_I(inode)->i_orphan);
2115 spin_unlock(&root->list_lock); 2212 delete_item = 1;
2116 return 0;
2117 } 2213 }
2118 2214
2119 list_del_init(&BTRFS_I(inode)->i_orphan); 2215 if (BTRFS_I(inode)->orphan_meta_reserved) {
2120 if (!trans) { 2216 BTRFS_I(inode)->orphan_meta_reserved = 0;
2121 spin_unlock(&root->list_lock); 2217 release_rsv = 1;
2122 return 0;
2123 } 2218 }
2219 spin_unlock(&root->orphan_lock);
2124 2220
2125 spin_unlock(&root->list_lock); 2221 if (trans && delete_item) {
2222 ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
2223 BUG_ON(ret);
2224 }
2126 2225
2127 ret = btrfs_del_orphan_item(trans, root, inode->i_ino); 2226 if (release_rsv)
2227 btrfs_orphan_release_metadata(inode);
2128 2228
2129 return ret; 2229 return 0;
2130} 2230}
2131 2231
2132/* 2232/*
@@ -2143,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2143 struct inode *inode; 2243 struct inode *inode;
2144 int ret = 0, nr_unlink = 0, nr_truncate = 0; 2244 int ret = 0, nr_unlink = 0, nr_truncate = 0;
2145 2245
2146 if (!xchg(&root->clean_orphans, 0)) 2246 if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
2147 return; 2247 return;
2148 2248
2149 path = btrfs_alloc_path(); 2249 path = btrfs_alloc_path();
@@ -2195,17 +2295,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2195 found_key.objectid = found_key.offset; 2295 found_key.objectid = found_key.offset;
2196 found_key.type = BTRFS_INODE_ITEM_KEY; 2296 found_key.type = BTRFS_INODE_ITEM_KEY;
2197 found_key.offset = 0; 2297 found_key.offset = 0;
2198 inode = btrfs_iget(root->fs_info->sb, &found_key, root); 2298 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2199 if (IS_ERR(inode)) 2299 BUG_ON(IS_ERR(inode));
2200 break;
2201 2300
2202 /* 2301 /*
2203 * add this inode to the orphan list so btrfs_orphan_del does 2302 * add this inode to the orphan list so btrfs_orphan_del does
2204 * the proper thing when we hit it 2303 * the proper thing when we hit it
2205 */ 2304 */
2206 spin_lock(&root->list_lock); 2305 spin_lock(&root->orphan_lock);
2207 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); 2306 list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
2208 spin_unlock(&root->list_lock); 2307 spin_unlock(&root->orphan_lock);
2209 2308
2210 /* 2309 /*
2211 * if this is a bad inode, means we actually succeeded in 2310 * if this is a bad inode, means we actually succeeded in
@@ -2214,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2214 * do a destroy_inode 2313 * do a destroy_inode
2215 */ 2314 */
2216 if (is_bad_inode(inode)) { 2315 if (is_bad_inode(inode)) {
2217 trans = btrfs_start_transaction(root, 1); 2316 trans = btrfs_start_transaction(root, 0);
2218 btrfs_orphan_del(trans, inode); 2317 btrfs_orphan_del(trans, inode);
2219 btrfs_end_transaction(trans, root); 2318 btrfs_end_transaction(trans, root);
2220 iput(inode); 2319 iput(inode);
@@ -2232,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2232 /* this will do delete_inode and everything for us */ 2331 /* this will do delete_inode and everything for us */
2233 iput(inode); 2332 iput(inode);
2234 } 2333 }
2334 btrfs_free_path(path);
2335
2336 root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
2337
2338 if (root->orphan_block_rsv)
2339 btrfs_block_rsv_release(root, root->orphan_block_rsv,
2340 (u64)-1);
2341
2342 if (root->orphan_block_rsv || root->orphan_item_inserted) {
2343 trans = btrfs_join_transaction(root, 1);
2344 btrfs_end_transaction(trans, root);
2345 }
2235 2346
2236 if (nr_unlink) 2347 if (nr_unlink)
2237 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); 2348 printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
2238 if (nr_truncate) 2349 if (nr_truncate)
2239 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); 2350 printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
2240
2241 btrfs_free_path(path);
2242} 2351}
2243 2352
2244/* 2353/*
@@ -2557,29 +2666,201 @@ out:
2557 return ret; 2666 return ret;
2558} 2667}
2559 2668
2560static int btrfs_unlink(struct inode *dir, struct dentry *dentry) 2669/* helper to check if there is any shared block in the path */
2670static int check_path_shared(struct btrfs_root *root,
2671 struct btrfs_path *path)
2672{
2673 struct extent_buffer *eb;
2674 int level;
2675 int ret;
2676 u64 refs = 1;
2677
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level])
2680 break;
2681 eb = path->nodes[level];
2682 if (!btrfs_block_can_be_shared(root, eb))
2683 continue;
2684 ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
2685 &refs, NULL);
2686 if (refs > 1)
2687 return 1;
2688 }
2689 return 0;
2690}
2691
2692/*
2693 * helper to start transaction for unlink and rmdir.
2694 *
2695 * unlink and rmdir are special in btrfs, they do not always free space.
2696 * so in enospc case, we should make sure they will free space before
2697 * allowing them to use the global metadata reservation.
2698 */
2699static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
2700 struct dentry *dentry)
2561{ 2701{
2562 struct btrfs_root *root;
2563 struct btrfs_trans_handle *trans; 2702 struct btrfs_trans_handle *trans;
2703 struct btrfs_root *root = BTRFS_I(dir)->root;
2704 struct btrfs_path *path;
2705 struct btrfs_inode_ref *ref;
2706 struct btrfs_dir_item *di;
2564 struct inode *inode = dentry->d_inode; 2707 struct inode *inode = dentry->d_inode;
2708 u64 index;
2709 int check_link = 1;
2710 int err = -ENOSPC;
2565 int ret; 2711 int ret;
2566 unsigned long nr = 0;
2567 2712
2568 root = BTRFS_I(dir)->root; 2713 trans = btrfs_start_transaction(root, 10);
2714 if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
2715 return trans;
2569 2716
2570 /* 2717 if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
2571 * 5 items for unlink inode 2718 return ERR_PTR(-ENOSPC);
2572 * 1 for orphan 2719
2573 */ 2720 /* check if there is someone else holds reference */
2574 ret = btrfs_reserve_metadata_space(root, 6); 2721 if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1)
2575 if (ret) 2722 return ERR_PTR(-ENOSPC);
2576 return ret; 2723
2724 if (atomic_read(&inode->i_count) > 2)
2725 return ERR_PTR(-ENOSPC);
2577 2726
2578 trans = btrfs_start_transaction(root, 1); 2727 if (xchg(&root->fs_info->enospc_unlink, 1))
2728 return ERR_PTR(-ENOSPC);
2729
2730 path = btrfs_alloc_path();
2731 if (!path) {
2732 root->fs_info->enospc_unlink = 0;
2733 return ERR_PTR(-ENOMEM);
2734 }
2735
2736 trans = btrfs_start_transaction(root, 0);
2579 if (IS_ERR(trans)) { 2737 if (IS_ERR(trans)) {
2580 btrfs_unreserve_metadata_space(root, 6); 2738 btrfs_free_path(path);
2581 return PTR_ERR(trans); 2739 root->fs_info->enospc_unlink = 0;
2740 return trans;
2741 }
2742
2743 path->skip_locking = 1;
2744 path->search_commit_root = 1;
2745
2746 ret = btrfs_lookup_inode(trans, root, path,
2747 &BTRFS_I(dir)->location, 0);
2748 if (ret < 0) {
2749 err = ret;
2750 goto out;
2751 }
2752 if (ret == 0) {
2753 if (check_path_shared(root, path))
2754 goto out;
2755 } else {
2756 check_link = 0;
2757 }
2758 btrfs_release_path(root, path);
2759
2760 ret = btrfs_lookup_inode(trans, root, path,
2761 &BTRFS_I(inode)->location, 0);
2762 if (ret < 0) {
2763 err = ret;
2764 goto out;
2765 }
2766 if (ret == 0) {
2767 if (check_path_shared(root, path))
2768 goto out;
2769 } else {
2770 check_link = 0;
2771 }
2772 btrfs_release_path(root, path);
2773
2774 if (ret == 0 && S_ISREG(inode->i_mode)) {
2775 ret = btrfs_lookup_file_extent(trans, root, path,
2776 inode->i_ino, (u64)-1, 0);
2777 if (ret < 0) {
2778 err = ret;
2779 goto out;
2780 }
2781 BUG_ON(ret == 0);
2782 if (check_path_shared(root, path))
2783 goto out;
2784 btrfs_release_path(root, path);
2785 }
2786
2787 if (!check_link) {
2788 err = 0;
2789 goto out;
2790 }
2791
2792 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2793 dentry->d_name.name, dentry->d_name.len, 0);
2794 if (IS_ERR(di)) {
2795 err = PTR_ERR(di);
2796 goto out;
2797 }
2798 if (di) {
2799 if (check_path_shared(root, path))
2800 goto out;
2801 } else {
2802 err = 0;
2803 goto out;
2804 }
2805 btrfs_release_path(root, path);
2806
2807 ref = btrfs_lookup_inode_ref(trans, root, path,
2808 dentry->d_name.name, dentry->d_name.len,
2809 inode->i_ino, dir->i_ino, 0);
2810 if (IS_ERR(ref)) {
2811 err = PTR_ERR(ref);
2812 goto out;
2813 }
2814 BUG_ON(!ref);
2815 if (check_path_shared(root, path))
2816 goto out;
2817 index = btrfs_inode_ref_index(path->nodes[0], ref);
2818 btrfs_release_path(root, path);
2819
2820 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
2821 dentry->d_name.name, dentry->d_name.len, 0);
2822 if (IS_ERR(di)) {
2823 err = PTR_ERR(di);
2824 goto out;
2582 } 2825 }
2826 BUG_ON(ret == -ENOENT);
2827 if (check_path_shared(root, path))
2828 goto out;
2829
2830 err = 0;
2831out:
2832 btrfs_free_path(path);
2833 if (err) {
2834 btrfs_end_transaction(trans, root);
2835 root->fs_info->enospc_unlink = 0;
2836 return ERR_PTR(err);
2837 }
2838
2839 trans->block_rsv = &root->fs_info->global_block_rsv;
2840 return trans;
2841}
2842
2843static void __unlink_end_trans(struct btrfs_trans_handle *trans,
2844 struct btrfs_root *root)
2845{
2846 if (trans->block_rsv == &root->fs_info->global_block_rsv) {
2847 BUG_ON(!root->fs_info->enospc_unlink);
2848 root->fs_info->enospc_unlink = 0;
2849 }
2850 btrfs_end_transaction_throttle(trans, root);
2851}
2852
2853static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2854{
2855 struct btrfs_root *root = BTRFS_I(dir)->root;
2856 struct btrfs_trans_handle *trans;
2857 struct inode *inode = dentry->d_inode;
2858 int ret;
2859 unsigned long nr = 0;
2860
2861 trans = __unlink_start_trans(dir, dentry);
2862 if (IS_ERR(trans))
2863 return PTR_ERR(trans);
2583 2864
2584 btrfs_set_trans_block_group(trans, dir); 2865 btrfs_set_trans_block_group(trans, dir);
2585 2866
@@ -2587,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2587 2868
2588 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2869 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2589 dentry->d_name.name, dentry->d_name.len); 2870 dentry->d_name.name, dentry->d_name.len);
2871 BUG_ON(ret);
2590 2872
2591 if (inode->i_nlink == 0) 2873 if (inode->i_nlink == 0) {
2592 ret = btrfs_orphan_add(trans, inode); 2874 ret = btrfs_orphan_add(trans, inode);
2875 BUG_ON(ret);
2876 }
2593 2877
2594 nr = trans->blocks_used; 2878 nr = trans->blocks_used;
2595 2879 __unlink_end_trans(trans, root);
2596 btrfs_end_transaction_throttle(trans, root);
2597 btrfs_unreserve_metadata_space(root, 6);
2598 btrfs_btree_balance_dirty(root, nr); 2880 btrfs_btree_balance_dirty(root, nr);
2599 return ret; 2881 return ret;
2600} 2882}
@@ -2656,7 +2938,6 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
2656 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 2938 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
2657 ret = btrfs_update_inode(trans, root, dir); 2939 ret = btrfs_update_inode(trans, root, dir);
2658 BUG_ON(ret); 2940 BUG_ON(ret);
2659 dir->i_sb->s_dirt = 1;
2660 2941
2661 btrfs_free_path(path); 2942 btrfs_free_path(path);
2662 return 0; 2943 return 0;
@@ -2666,7 +2947,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2666{ 2947{
2667 struct inode *inode = dentry->d_inode; 2948 struct inode *inode = dentry->d_inode;
2668 int err = 0; 2949 int err = 0;
2669 int ret;
2670 struct btrfs_root *root = BTRFS_I(dir)->root; 2950 struct btrfs_root *root = BTRFS_I(dir)->root;
2671 struct btrfs_trans_handle *trans; 2951 struct btrfs_trans_handle *trans;
2672 unsigned long nr = 0; 2952 unsigned long nr = 0;
@@ -2675,15 +2955,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2675 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 2955 inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
2676 return -ENOTEMPTY; 2956 return -ENOTEMPTY;
2677 2957
2678 ret = btrfs_reserve_metadata_space(root, 5); 2958 trans = __unlink_start_trans(dir, dentry);
2679 if (ret) 2959 if (IS_ERR(trans))
2680 return ret;
2681
2682 trans = btrfs_start_transaction(root, 1);
2683 if (IS_ERR(trans)) {
2684 btrfs_unreserve_metadata_space(root, 5);
2685 return PTR_ERR(trans); 2960 return PTR_ERR(trans);
2686 }
2687 2961
2688 btrfs_set_trans_block_group(trans, dir); 2962 btrfs_set_trans_block_group(trans, dir);
2689 2963
@@ -2706,12 +2980,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2706 btrfs_i_size_write(inode, 0); 2980 btrfs_i_size_write(inode, 0);
2707out: 2981out:
2708 nr = trans->blocks_used; 2982 nr = trans->blocks_used;
2709 ret = btrfs_end_transaction_throttle(trans, root); 2983 __unlink_end_trans(trans, root);
2710 btrfs_unreserve_metadata_space(root, 5);
2711 btrfs_btree_balance_dirty(root, nr); 2984 btrfs_btree_balance_dirty(root, nr);
2712 2985
2713 if (ret && !err)
2714 err = ret;
2715 return err; 2986 return err;
2716} 2987}
2717 2988
@@ -3108,6 +3379,7 @@ out:
3108 if (pending_del_nr) { 3379 if (pending_del_nr) {
3109 ret = btrfs_del_items(trans, root, path, pending_del_slot, 3380 ret = btrfs_del_items(trans, root, path, pending_del_slot,
3110 pending_del_nr); 3381 pending_del_nr);
3382 BUG_ON(ret);
3111 } 3383 }
3112 btrfs_free_path(path); 3384 btrfs_free_path(path);
3113 return err; 3385 return err;
@@ -3123,6 +3395,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3123 struct btrfs_root *root = BTRFS_I(inode)->root; 3395 struct btrfs_root *root = BTRFS_I(inode)->root;
3124 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3396 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3125 struct btrfs_ordered_extent *ordered; 3397 struct btrfs_ordered_extent *ordered;
3398 struct extent_state *cached_state = NULL;
3126 char *kaddr; 3399 char *kaddr;
3127 u32 blocksize = root->sectorsize; 3400 u32 blocksize = root->sectorsize;
3128 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3401 pgoff_t index = from >> PAGE_CACHE_SHIFT;
@@ -3134,11 +3407,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3134 3407
3135 if ((offset & (blocksize - 1)) == 0) 3408 if ((offset & (blocksize - 1)) == 0)
3136 goto out; 3409 goto out;
3137 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 3410 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
3138 if (ret)
3139 goto out;
3140
3141 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
3142 if (ret) 3411 if (ret)
3143 goto out; 3412 goto out;
3144 3413
@@ -3146,8 +3415,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3146again: 3415again:
3147 page = grab_cache_page(mapping, index); 3416 page = grab_cache_page(mapping, index);
3148 if (!page) { 3417 if (!page) {
3149 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3418 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3150 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3151 goto out; 3419 goto out;
3152 } 3420 }
3153 3421
@@ -3169,12 +3437,14 @@ again:
3169 } 3437 }
3170 wait_on_page_writeback(page); 3438 wait_on_page_writeback(page);
3171 3439
3172 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3440 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
3441 GFP_NOFS);
3173 set_page_extent_mapped(page); 3442 set_page_extent_mapped(page);
3174 3443
3175 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3444 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3176 if (ordered) { 3445 if (ordered) {
3177 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3446 unlock_extent_cached(io_tree, page_start, page_end,
3447 &cached_state, GFP_NOFS);
3178 unlock_page(page); 3448 unlock_page(page);
3179 page_cache_release(page); 3449 page_cache_release(page);
3180 btrfs_start_ordered_extent(inode, ordered, 1); 3450 btrfs_start_ordered_extent(inode, ordered, 1);
@@ -3182,13 +3452,15 @@ again:
3182 goto again; 3452 goto again;
3183 } 3453 }
3184 3454
3185 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 3455 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3186 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 3456 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3187 GFP_NOFS); 3457 0, 0, &cached_state, GFP_NOFS);
3188 3458
3189 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3459 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
3460 &cached_state);
3190 if (ret) { 3461 if (ret) {
3191 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3462 unlock_extent_cached(io_tree, page_start, page_end,
3463 &cached_state, GFP_NOFS);
3192 goto out_unlock; 3464 goto out_unlock;
3193 } 3465 }
3194 3466
@@ -3201,12 +3473,12 @@ again:
3201 } 3473 }
3202 ClearPageChecked(page); 3474 ClearPageChecked(page);
3203 set_page_dirty(page); 3475 set_page_dirty(page);
3204 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3476 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
3477 GFP_NOFS);
3205 3478
3206out_unlock: 3479out_unlock:
3207 if (ret) 3480 if (ret)
3208 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 3481 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
3209 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
3210 unlock_page(page); 3482 unlock_page(page);
3211 page_cache_release(page); 3483 page_cache_release(page);
3212out: 3484out:
@@ -3218,7 +3490,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3218 struct btrfs_trans_handle *trans; 3490 struct btrfs_trans_handle *trans;
3219 struct btrfs_root *root = BTRFS_I(inode)->root; 3491 struct btrfs_root *root = BTRFS_I(inode)->root;
3220 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3492 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3221 struct extent_map *em; 3493 struct extent_map *em = NULL;
3494 struct extent_state *cached_state = NULL;
3222 u64 mask = root->sectorsize - 1; 3495 u64 mask = root->sectorsize - 1;
3223 u64 hole_start = (inode->i_size + mask) & ~mask; 3496 u64 hole_start = (inode->i_size + mask) & ~mask;
3224 u64 block_end = (size + mask) & ~mask; 3497 u64 block_end = (size + mask) & ~mask;
@@ -3234,11 +3507,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3234 struct btrfs_ordered_extent *ordered; 3507 struct btrfs_ordered_extent *ordered;
3235 btrfs_wait_ordered_range(inode, hole_start, 3508 btrfs_wait_ordered_range(inode, hole_start,
3236 block_end - hole_start); 3509 block_end - hole_start);
3237 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3510 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3511 &cached_state, GFP_NOFS);
3238 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3512 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3239 if (!ordered) 3513 if (!ordered)
3240 break; 3514 break;
3241 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3515 unlock_extent_cached(io_tree, hole_start, block_end - 1,
3516 &cached_state, GFP_NOFS);
3242 btrfs_put_ordered_extent(ordered); 3517 btrfs_put_ordered_extent(ordered);
3243 } 3518 }
3244 3519
@@ -3253,11 +3528,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3253 u64 hint_byte = 0; 3528 u64 hint_byte = 0;
3254 hole_size = last_byte - cur_offset; 3529 hole_size = last_byte - cur_offset;
3255 3530
3256 err = btrfs_reserve_metadata_space(root, 2); 3531 trans = btrfs_start_transaction(root, 2);
3257 if (err) 3532 if (IS_ERR(trans)) {
3533 err = PTR_ERR(trans);
3258 break; 3534 break;
3259 3535 }
3260 trans = btrfs_start_transaction(root, 1);
3261 btrfs_set_trans_block_group(trans, inode); 3536 btrfs_set_trans_block_group(trans, inode);
3262 3537
3263 err = btrfs_drop_extents(trans, inode, cur_offset, 3538 err = btrfs_drop_extents(trans, inode, cur_offset,
@@ -3275,15 +3550,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3275 last_byte - 1, 0); 3550 last_byte - 1, 0);
3276 3551
3277 btrfs_end_transaction(trans, root); 3552 btrfs_end_transaction(trans, root);
3278 btrfs_unreserve_metadata_space(root, 2);
3279 } 3553 }
3280 free_extent_map(em); 3554 free_extent_map(em);
3555 em = NULL;
3281 cur_offset = last_byte; 3556 cur_offset = last_byte;
3282 if (cur_offset >= block_end) 3557 if (cur_offset >= block_end)
3283 break; 3558 break;
3284 } 3559 }
3285 3560
3286 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3561 free_extent_map(em);
3562 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3563 GFP_NOFS);
3287 return err; 3564 return err;
3288} 3565}
3289 3566
@@ -3308,11 +3585,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3308 } 3585 }
3309 } 3586 }
3310 3587
3311 ret = btrfs_reserve_metadata_space(root, 1); 3588 trans = btrfs_start_transaction(root, 5);
3312 if (ret) 3589 if (IS_ERR(trans))
3313 return ret; 3590 return PTR_ERR(trans);
3314 3591
3315 trans = btrfs_start_transaction(root, 1);
3316 btrfs_set_trans_block_group(trans, inode); 3592 btrfs_set_trans_block_group(trans, inode);
3317 3593
3318 ret = btrfs_orphan_add(trans, inode); 3594 ret = btrfs_orphan_add(trans, inode);
@@ -3320,7 +3596,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3320 3596
3321 nr = trans->blocks_used; 3597 nr = trans->blocks_used;
3322 btrfs_end_transaction(trans, root); 3598 btrfs_end_transaction(trans, root);
3323 btrfs_unreserve_metadata_space(root, 1);
3324 btrfs_btree_balance_dirty(root, nr); 3599 btrfs_btree_balance_dirty(root, nr);
3325 3600
3326 if (attr->ia_size > inode->i_size) { 3601 if (attr->ia_size > inode->i_size) {
@@ -3333,8 +3608,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
3333 i_size_write(inode, attr->ia_size); 3608 i_size_write(inode, attr->ia_size);
3334 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 3609 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
3335 3610
3336 trans = btrfs_start_transaction(root, 1); 3611 trans = btrfs_start_transaction(root, 0);
3612 BUG_ON(IS_ERR(trans));
3337 btrfs_set_trans_block_group(trans, inode); 3613 btrfs_set_trans_block_group(trans, inode);
3614 trans->block_rsv = root->orphan_block_rsv;
3615 BUG_ON(!trans->block_rsv);
3338 3616
3339 ret = btrfs_update_inode(trans, root, inode); 3617 ret = btrfs_update_inode(trans, root, inode);
3340 BUG_ON(ret); 3618 BUG_ON(ret);
@@ -3377,17 +3655,19 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3377 if (err) 3655 if (err)
3378 return err; 3656 return err;
3379 } 3657 }
3380 attr->ia_valid &= ~ATTR_SIZE;
3381 3658
3382 if (attr->ia_valid) 3659 if (attr->ia_valid) {
3383 err = inode_setattr(inode, attr); 3660 setattr_copy(inode, attr);
3661 mark_inode_dirty(inode);
3662
3663 if (attr->ia_valid & ATTR_MODE)
3664 err = btrfs_acl_chmod(inode);
3665 }
3384 3666
3385 if (!err && ((attr->ia_valid & ATTR_MODE)))
3386 err = btrfs_acl_chmod(inode);
3387 return err; 3667 return err;
3388} 3668}
3389 3669
3390void btrfs_delete_inode(struct inode *inode) 3670void btrfs_evict_inode(struct inode *inode)
3391{ 3671{
3392 struct btrfs_trans_handle *trans; 3672 struct btrfs_trans_handle *trans;
3393 struct btrfs_root *root = BTRFS_I(inode)->root; 3673 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3395,10 +3675,14 @@ void btrfs_delete_inode(struct inode *inode)
3395 int ret; 3675 int ret;
3396 3676
3397 truncate_inode_pages(&inode->i_data, 0); 3677 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
3679 goto no_delete;
3680
3398 if (is_bad_inode(inode)) { 3681 if (is_bad_inode(inode)) {
3399 btrfs_orphan_del(NULL, inode); 3682 btrfs_orphan_del(NULL, inode);
3400 goto no_delete; 3683 goto no_delete;
3401 } 3684 }
3685 /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
3402 btrfs_wait_ordered_range(inode, 0, (u64)-1); 3686 btrfs_wait_ordered_range(inode, 0, (u64)-1);
3403 3687
3404 if (root->fs_info->log_root_recovering) { 3688 if (root->fs_info->log_root_recovering) {
@@ -3414,10 +3698,21 @@ void btrfs_delete_inode(struct inode *inode)
3414 btrfs_i_size_write(inode, 0); 3698 btrfs_i_size_write(inode, 0);
3415 3699
3416 while (1) { 3700 while (1) {
3417 trans = btrfs_start_transaction(root, 1); 3701 trans = btrfs_start_transaction(root, 0);
3702 BUG_ON(IS_ERR(trans));
3418 btrfs_set_trans_block_group(trans, inode); 3703 btrfs_set_trans_block_group(trans, inode);
3419 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); 3704 trans->block_rsv = root->orphan_block_rsv;
3420 3705
3706 ret = btrfs_block_rsv_check(trans, root,
3707 root->orphan_block_rsv, 0, 5);
3708 if (ret) {
3709 BUG_ON(ret != -EAGAIN);
3710 ret = btrfs_commit_transaction(trans, root);
3711 BUG_ON(ret);
3712 continue;
3713 }
3714
3715 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
3421 if (ret != -EAGAIN) 3716 if (ret != -EAGAIN)
3422 break; 3717 break;
3423 3718
@@ -3425,6 +3720,7 @@ void btrfs_delete_inode(struct inode *inode)
3425 btrfs_end_transaction(trans, root); 3720 btrfs_end_transaction(trans, root);
3426 trans = NULL; 3721 trans = NULL;
3427 btrfs_btree_balance_dirty(root, nr); 3722 btrfs_btree_balance_dirty(root, nr);
3723
3428 } 3724 }
3429 3725
3430 if (ret == 0) { 3726 if (ret == 0) {
@@ -3436,7 +3732,7 @@ void btrfs_delete_inode(struct inode *inode)
3436 btrfs_end_transaction(trans, root); 3732 btrfs_end_transaction(trans, root);
3437 btrfs_btree_balance_dirty(root, nr); 3733 btrfs_btree_balance_dirty(root, nr);
3438no_delete: 3734no_delete:
3439 clear_inode(inode); 3735 end_writeback(inode);
3440 return; 3736 return;
3441} 3737}
3442 3738
@@ -3567,7 +3863,7 @@ again:
3567 p = &parent->rb_right; 3863 p = &parent->rb_right;
3568 else { 3864 else {
3569 WARN_ON(!(entry->vfs_inode.i_state & 3865 WARN_ON(!(entry->vfs_inode.i_state &
3570 (I_WILL_FREE | I_FREEING | I_CLEAR))); 3866 (I_WILL_FREE | I_FREEING)));
3571 rb_erase(parent, &root->inode_tree); 3867 rb_erase(parent, &root->inode_tree);
3572 RB_CLEAR_NODE(parent); 3868 RB_CLEAR_NODE(parent);
3573 spin_unlock(&root->inode_lock); 3869 spin_unlock(&root->inode_lock);
@@ -3646,7 +3942,7 @@ again:
3646 if (atomic_read(&inode->i_count) > 1) 3942 if (atomic_read(&inode->i_count) > 1)
3647 d_prune_aliases(inode); 3943 d_prune_aliases(inode);
3648 /* 3944 /*
3649 * btrfs_drop_inode will remove it from 3945 * btrfs_drop_inode will have it removed from
3650 * the inode cache when its usage count 3946 * the inode cache when its usage count
3651 * hits zero. 3947 * hits zero.
3652 */ 3948 */
@@ -3665,39 +3961,10 @@ again:
3665 return 0; 3961 return 0;
3666} 3962}
3667 3963
3668static noinline void init_btrfs_i(struct inode *inode)
3669{
3670 struct btrfs_inode *bi = BTRFS_I(inode);
3671
3672 bi->generation = 0;
3673 bi->sequence = 0;
3674 bi->last_trans = 0;
3675 bi->last_sub_trans = 0;
3676 bi->logged_trans = 0;
3677 bi->delalloc_bytes = 0;
3678 bi->reserved_bytes = 0;
3679 bi->disk_i_size = 0;
3680 bi->flags = 0;
3681 bi->index_cnt = (u64)-1;
3682 bi->last_unlink_trans = 0;
3683 bi->ordered_data_close = 0;
3684 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3685 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3686 inode->i_mapping, GFP_NOFS);
3687 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3688 inode->i_mapping, GFP_NOFS);
3689 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3690 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3691 RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
3692 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3693 mutex_init(&BTRFS_I(inode)->log_mutex);
3694}
3695
3696static int btrfs_init_locked_inode(struct inode *inode, void *p) 3964static int btrfs_init_locked_inode(struct inode *inode, void *p)
3697{ 3965{
3698 struct btrfs_iget_args *args = p; 3966 struct btrfs_iget_args *args = p;
3699 inode->i_ino = args->ino; 3967 inode->i_ino = args->ino;
3700 init_btrfs_i(inode);
3701 BTRFS_I(inode)->root = args->root; 3968 BTRFS_I(inode)->root = args->root;
3702 btrfs_set_inode_space_info(args->root, inode); 3969 btrfs_set_inode_space_info(args->root, inode);
3703 return 0; 3970 return 0;
@@ -3729,7 +3996,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
3729 * Returns in *is_new if the inode was read from disk 3996 * Returns in *is_new if the inode was read from disk
3730 */ 3997 */
3731struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3998struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3732 struct btrfs_root *root) 3999 struct btrfs_root *root, int *new)
3733{ 4000{
3734 struct inode *inode; 4001 struct inode *inode;
3735 4002
@@ -3744,6 +4011,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3744 4011
3745 inode_tree_add(inode); 4012 inode_tree_add(inode);
3746 unlock_new_inode(inode); 4013 unlock_new_inode(inode);
4014 if (new)
4015 *new = 1;
3747 } 4016 }
3748 4017
3749 return inode; 4018 return inode;
@@ -3758,8 +4027,6 @@ static struct inode *new_simple_dir(struct super_block *s,
3758 if (!inode) 4027 if (!inode)
3759 return ERR_PTR(-ENOMEM); 4028 return ERR_PTR(-ENOMEM);
3760 4029
3761 init_btrfs_i(inode);
3762
3763 BTRFS_I(inode)->root = root; 4030 BTRFS_I(inode)->root = root;
3764 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); 4031 memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
3765 BTRFS_I(inode)->dummy_inode = 1; 4032 BTRFS_I(inode)->dummy_inode = 1;
@@ -3796,7 +4063,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3796 return NULL; 4063 return NULL;
3797 4064
3798 if (location.type == BTRFS_INODE_ITEM_KEY) { 4065 if (location.type == BTRFS_INODE_ITEM_KEY) {
3799 inode = btrfs_iget(dir->i_sb, &location, root); 4066 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
3800 return inode; 4067 return inode;
3801 } 4068 }
3802 4069
@@ -3811,7 +4078,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3811 else 4078 else
3812 inode = new_simple_dir(dir->i_sb, &location, sub_root); 4079 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3813 } else { 4080 } else {
3814 inode = btrfs_iget(dir->i_sb, &location, sub_root); 4081 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
3815 } 4082 }
3816 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 4083 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3817 4084
@@ -4010,16 +4277,16 @@ err:
4010 return ret; 4277 return ret;
4011} 4278}
4012 4279
4013int btrfs_write_inode(struct inode *inode, int wait) 4280int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4014{ 4281{
4015 struct btrfs_root *root = BTRFS_I(inode)->root; 4282 struct btrfs_root *root = BTRFS_I(inode)->root;
4016 struct btrfs_trans_handle *trans; 4283 struct btrfs_trans_handle *trans;
4017 int ret = 0; 4284 int ret = 0;
4018 4285
4019 if (root->fs_info->btree_inode == inode) 4286 if (BTRFS_I(inode)->dummy_inode)
4020 return 0; 4287 return 0;
4021 4288
4022 if (wait) { 4289 if (wbc->sync_mode == WB_SYNC_ALL) {
4023 trans = btrfs_join_transaction(root, 1); 4290 trans = btrfs_join_transaction(root, 1);
4024 btrfs_set_trans_block_group(trans, inode); 4291 btrfs_set_trans_block_group(trans, inode);
4025 ret = btrfs_commit_transaction(trans, root); 4292 ret = btrfs_commit_transaction(trans, root);
@@ -4037,10 +4304,38 @@ void btrfs_dirty_inode(struct inode *inode)
4037{ 4304{
4038 struct btrfs_root *root = BTRFS_I(inode)->root; 4305 struct btrfs_root *root = BTRFS_I(inode)->root;
4039 struct btrfs_trans_handle *trans; 4306 struct btrfs_trans_handle *trans;
4307 int ret;
4308
4309 if (BTRFS_I(inode)->dummy_inode)
4310 return;
4040 4311
4041 trans = btrfs_join_transaction(root, 1); 4312 trans = btrfs_join_transaction(root, 1);
4042 btrfs_set_trans_block_group(trans, inode); 4313 btrfs_set_trans_block_group(trans, inode);
4043 btrfs_update_inode(trans, root, inode); 4314
4315 ret = btrfs_update_inode(trans, root, inode);
4316 if (ret && ret == -ENOSPC) {
4317 /* whoops, lets try again with the full transaction */
4318 btrfs_end_transaction(trans, root);
4319 trans = btrfs_start_transaction(root, 1);
4320 if (IS_ERR(trans)) {
4321 if (printk_ratelimit()) {
4322 printk(KERN_ERR "btrfs: fail to "
4323 "dirty inode %lu error %ld\n",
4324 inode->i_ino, PTR_ERR(trans));
4325 }
4326 return;
4327 }
4328 btrfs_set_trans_block_group(trans, inode);
4329
4330 ret = btrfs_update_inode(trans, root, inode);
4331 if (ret) {
4332 if (printk_ratelimit()) {
4333 printk(KERN_ERR "btrfs: fail to "
4334 "dirty inode %lu error %d\n",
4335 inode->i_ino, ret);
4336 }
4337 }
4338 }
4044 btrfs_end_transaction(trans, root); 4339 btrfs_end_transaction(trans, root);
4045} 4340}
4046 4341
@@ -4158,7 +4453,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4158 * btrfs_get_inode_index_count has an explanation for the magic 4453 * btrfs_get_inode_index_count has an explanation for the magic
4159 * number 4454 * number
4160 */ 4455 */
4161 init_btrfs_i(inode);
4162 BTRFS_I(inode)->index_cnt = 2; 4456 BTRFS_I(inode)->index_cnt = 2;
4163 BTRFS_I(inode)->root = root; 4457 BTRFS_I(inode)->root = root;
4164 BTRFS_I(inode)->generation = trans->transid; 4458 BTRFS_I(inode)->generation = trans->transid;
@@ -4187,16 +4481,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4187 if (ret != 0) 4481 if (ret != 0)
4188 goto fail; 4482 goto fail;
4189 4483
4190 inode->i_uid = current_fsuid(); 4484 inode_init_owner(inode, dir, mode);
4191
4192 if (dir && (dir->i_mode & S_ISGID)) {
4193 inode->i_gid = dir->i_gid;
4194 if (S_ISDIR(mode))
4195 mode |= S_ISGID;
4196 } else
4197 inode->i_gid = current_fsgid();
4198
4199 inode->i_mode = mode;
4200 inode->i_ino = objectid; 4485 inode->i_ino = objectid;
4201 inode_set_bytes(inode, 0); 4486 inode_set_bytes(inode, 0);
4202 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 4487 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4322,26 +4607,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4322 if (!new_valid_dev(rdev)) 4607 if (!new_valid_dev(rdev))
4323 return -EINVAL; 4608 return -EINVAL;
4324 4609
4610 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4611 if (err)
4612 return err;
4613
4325 /* 4614 /*
4326 * 2 for inode item and ref 4615 * 2 for inode item and ref
4327 * 2 for dir items 4616 * 2 for dir items
4328 * 1 for xattr if selinux is on 4617 * 1 for xattr if selinux is on
4329 */ 4618 */
4330 err = btrfs_reserve_metadata_space(root, 5); 4619 trans = btrfs_start_transaction(root, 5);
4331 if (err) 4620 if (IS_ERR(trans))
4332 return err; 4621 return PTR_ERR(trans);
4333 4622
4334 trans = btrfs_start_transaction(root, 1);
4335 if (!trans)
4336 goto fail;
4337 btrfs_set_trans_block_group(trans, dir); 4623 btrfs_set_trans_block_group(trans, dir);
4338 4624
4339 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4340 if (err) {
4341 err = -ENOSPC;
4342 goto out_unlock;
4343 }
4344
4345 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4625 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4346 dentry->d_name.len, 4626 dentry->d_name.len,
4347 dentry->d_parent->d_inode->i_ino, objectid, 4627 dentry->d_parent->d_inode->i_ino, objectid,
@@ -4370,13 +4650,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4370out_unlock: 4650out_unlock:
4371 nr = trans->blocks_used; 4651 nr = trans->blocks_used;
4372 btrfs_end_transaction_throttle(trans, root); 4652 btrfs_end_transaction_throttle(trans, root);
4373fail: 4653 btrfs_btree_balance_dirty(root, nr);
4374 btrfs_unreserve_metadata_space(root, 5);
4375 if (drop_inode) { 4654 if (drop_inode) {
4376 inode_dec_link_count(inode); 4655 inode_dec_link_count(inode);
4377 iput(inode); 4656 iput(inode);
4378 } 4657 }
4379 btrfs_btree_balance_dirty(root, nr);
4380 return err; 4658 return err;
4381} 4659}
4382 4660
@@ -4386,32 +4664,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4386 struct btrfs_trans_handle *trans; 4664 struct btrfs_trans_handle *trans;
4387 struct btrfs_root *root = BTRFS_I(dir)->root; 4665 struct btrfs_root *root = BTRFS_I(dir)->root;
4388 struct inode *inode = NULL; 4666 struct inode *inode = NULL;
4389 int err;
4390 int drop_inode = 0; 4667 int drop_inode = 0;
4668 int err;
4391 unsigned long nr = 0; 4669 unsigned long nr = 0;
4392 u64 objectid; 4670 u64 objectid;
4393 u64 index = 0; 4671 u64 index = 0;
4394 4672
4673 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4674 if (err)
4675 return err;
4395 /* 4676 /*
4396 * 2 for inode item and ref 4677 * 2 for inode item and ref
4397 * 2 for dir items 4678 * 2 for dir items
4398 * 1 for xattr if selinux is on 4679 * 1 for xattr if selinux is on
4399 */ 4680 */
4400 err = btrfs_reserve_metadata_space(root, 5); 4681 trans = btrfs_start_transaction(root, 5);
4401 if (err) 4682 if (IS_ERR(trans))
4402 return err; 4683 return PTR_ERR(trans);
4403 4684
4404 trans = btrfs_start_transaction(root, 1);
4405 if (!trans)
4406 goto fail;
4407 btrfs_set_trans_block_group(trans, dir); 4685 btrfs_set_trans_block_group(trans, dir);
4408 4686
4409 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4410 if (err) {
4411 err = -ENOSPC;
4412 goto out_unlock;
4413 }
4414
4415 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4687 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4416 dentry->d_name.len, 4688 dentry->d_name.len,
4417 dentry->d_parent->d_inode->i_ino, 4689 dentry->d_parent->d_inode->i_ino,
@@ -4443,8 +4715,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4443out_unlock: 4715out_unlock:
4444 nr = trans->blocks_used; 4716 nr = trans->blocks_used;
4445 btrfs_end_transaction_throttle(trans, root); 4717 btrfs_end_transaction_throttle(trans, root);
4446fail:
4447 btrfs_unreserve_metadata_space(root, 5);
4448 if (drop_inode) { 4718 if (drop_inode) {
4449 inode_dec_link_count(inode); 4719 inode_dec_link_count(inode);
4450 iput(inode); 4720 iput(inode);
@@ -4471,21 +4741,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4471 if (root->objectid != BTRFS_I(inode)->root->objectid) 4741 if (root->objectid != BTRFS_I(inode)->root->objectid)
4472 return -EPERM; 4742 return -EPERM;
4473 4743
4474 /*
4475 * 1 item for inode ref
4476 * 2 items for dir items
4477 */
4478 err = btrfs_reserve_metadata_space(root, 3);
4479 if (err)
4480 return err;
4481
4482 btrfs_inc_nlink(inode); 4744 btrfs_inc_nlink(inode);
4483 4745
4484 err = btrfs_set_inode_index(dir, &index); 4746 err = btrfs_set_inode_index(dir, &index);
4485 if (err) 4747 if (err)
4486 goto fail; 4748 goto fail;
4487 4749
4488 trans = btrfs_start_transaction(root, 1); 4750 /*
4751 * 1 item for inode ref
4752 * 2 items for dir items
4753 */
4754 trans = btrfs_start_transaction(root, 3);
4755 if (IS_ERR(trans)) {
4756 err = PTR_ERR(trans);
4757 goto fail;
4758 }
4489 4759
4490 btrfs_set_trans_block_group(trans, dir); 4760 btrfs_set_trans_block_group(trans, dir);
4491 atomic_inc(&inode->i_count); 4761 atomic_inc(&inode->i_count);
@@ -4504,7 +4774,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4504 nr = trans->blocks_used; 4774 nr = trans->blocks_used;
4505 btrfs_end_transaction_throttle(trans, root); 4775 btrfs_end_transaction_throttle(trans, root);
4506fail: 4776fail:
4507 btrfs_unreserve_metadata_space(root, 3);
4508 if (drop_inode) { 4777 if (drop_inode) {
4509 inode_dec_link_count(inode); 4778 inode_dec_link_count(inode);
4510 iput(inode); 4779 iput(inode);
@@ -4524,28 +4793,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4524 u64 index = 0; 4793 u64 index = 0;
4525 unsigned long nr = 1; 4794 unsigned long nr = 1;
4526 4795
4796 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
4797 if (err)
4798 return err;
4799
4527 /* 4800 /*
4528 * 2 items for inode and ref 4801 * 2 items for inode and ref
4529 * 2 items for dir items 4802 * 2 items for dir items
4530 * 1 for xattr if selinux is on 4803 * 1 for xattr if selinux is on
4531 */ 4804 */
4532 err = btrfs_reserve_metadata_space(root, 5); 4805 trans = btrfs_start_transaction(root, 5);
4533 if (err) 4806 if (IS_ERR(trans))
4534 return err; 4807 return PTR_ERR(trans);
4535
4536 trans = btrfs_start_transaction(root, 1);
4537 if (!trans) {
4538 err = -ENOMEM;
4539 goto out_unlock;
4540 }
4541 btrfs_set_trans_block_group(trans, dir); 4808 btrfs_set_trans_block_group(trans, dir);
4542 4809
4543 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4544 if (err) {
4545 err = -ENOSPC;
4546 goto out_unlock;
4547 }
4548
4549 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4810 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4550 dentry->d_name.len, 4811 dentry->d_name.len,
4551 dentry->d_parent->d_inode->i_ino, objectid, 4812 dentry->d_parent->d_inode->i_ino, objectid,
@@ -4585,9 +4846,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4585out_fail: 4846out_fail:
4586 nr = trans->blocks_used; 4847 nr = trans->blocks_used;
4587 btrfs_end_transaction_throttle(trans, root); 4848 btrfs_end_transaction_throttle(trans, root);
4588
4589out_unlock:
4590 btrfs_unreserve_metadata_space(root, 5);
4591 if (drop_on_err) 4849 if (drop_on_err)
4592 iput(inode); 4850 iput(inode);
4593 btrfs_btree_balance_dirty(root, nr); 4851 btrfs_btree_balance_dirty(root, nr);
@@ -4845,6 +5103,7 @@ again:
4845 } 5103 }
4846 flush_dcache_page(page); 5104 flush_dcache_page(page);
4847 } else if (create && PageUptodate(page)) { 5105 } else if (create && PageUptodate(page)) {
5106 WARN_ON(1);
4848 if (!trans) { 5107 if (!trans) {
4849 kunmap(page); 5108 kunmap(page);
4850 free_extent_map(em); 5109 free_extent_map(em);
@@ -4941,11 +5200,651 @@ out:
4941 return em; 5200 return em;
4942} 5201}
4943 5202
5203static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
5204 u64 start, u64 len)
5205{
5206 struct btrfs_root *root = BTRFS_I(inode)->root;
5207 struct btrfs_trans_handle *trans;
5208 struct extent_map *em;
5209 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
5210 struct btrfs_key ins;
5211 u64 alloc_hint;
5212 int ret;
5213
5214 btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
5215
5216 trans = btrfs_join_transaction(root, 0);
5217 if (!trans)
5218 return ERR_PTR(-ENOMEM);
5219
5220 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5221
5222 alloc_hint = get_extent_allocation_hint(inode, start, len);
5223 ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
5224 alloc_hint, (u64)-1, &ins, 1);
5225 if (ret) {
5226 em = ERR_PTR(ret);
5227 goto out;
5228 }
5229
5230 em = alloc_extent_map(GFP_NOFS);
5231 if (!em) {
5232 em = ERR_PTR(-ENOMEM);
5233 goto out;
5234 }
5235
5236 em->start = start;
5237 em->orig_start = em->start;
5238 em->len = ins.offset;
5239
5240 em->block_start = ins.objectid;
5241 em->block_len = ins.offset;
5242 em->bdev = root->fs_info->fs_devices->latest_bdev;
5243 set_bit(EXTENT_FLAG_PINNED, &em->flags);
5244
5245 while (1) {
5246 write_lock(&em_tree->lock);
5247 ret = add_extent_mapping(em_tree, em);
5248 write_unlock(&em_tree->lock);
5249 if (ret != -EEXIST)
5250 break;
5251 btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
5252 }
5253
5254 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
5255 ins.offset, ins.offset, 0);
5256 if (ret) {
5257 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
5258 em = ERR_PTR(ret);
5259 }
5260out:
5261 btrfs_end_transaction(trans, root);
5262 return em;
5263}
5264
5265/*
5266 * returns 1 when the nocow is safe, < 1 on error, 0 if the
5267 * block must be cow'd
5268 */
5269static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
5270 struct inode *inode, u64 offset, u64 len)
5271{
5272 struct btrfs_path *path;
5273 int ret;
5274 struct extent_buffer *leaf;
5275 struct btrfs_root *root = BTRFS_I(inode)->root;
5276 struct btrfs_file_extent_item *fi;
5277 struct btrfs_key key;
5278 u64 disk_bytenr;
5279 u64 backref_offset;
5280 u64 extent_end;
5281 u64 num_bytes;
5282 int slot;
5283 int found_type;
5284
5285 path = btrfs_alloc_path();
5286 if (!path)
5287 return -ENOMEM;
5288
5289 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
5290 offset, 0);
5291 if (ret < 0)
5292 goto out;
5293
5294 slot = path->slots[0];
5295 if (ret == 1) {
5296 if (slot == 0) {
5297 /* can't find the item, must cow */
5298 ret = 0;
5299 goto out;
5300 }
5301 slot--;
5302 }
5303 ret = 0;
5304 leaf = path->nodes[0];
5305 btrfs_item_key_to_cpu(leaf, &key, slot);
5306 if (key.objectid != inode->i_ino ||
5307 key.type != BTRFS_EXTENT_DATA_KEY) {
5308 /* not our file or wrong item type, must cow */
5309 goto out;
5310 }
5311
5312 if (key.offset > offset) {
5313 /* Wrong offset, must cow */
5314 goto out;
5315 }
5316
5317 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
5318 found_type = btrfs_file_extent_type(leaf, fi);
5319 if (found_type != BTRFS_FILE_EXTENT_REG &&
5320 found_type != BTRFS_FILE_EXTENT_PREALLOC) {
5321 /* not a regular extent, must cow */
5322 goto out;
5323 }
5324 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
5325 backref_offset = btrfs_file_extent_offset(leaf, fi);
5326
5327 extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
5328 if (extent_end < offset + len) {
5329 /* extent doesn't include our full range, must cow */
5330 goto out;
5331 }
5332
5333 if (btrfs_extent_readonly(root, disk_bytenr))
5334 goto out;
5335
5336 /*
5337 * look for other files referencing this extent, if we
5338 * find any we must cow
5339 */
5340 if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
5341 key.offset - backref_offset, disk_bytenr))
5342 goto out;
5343
5344 /*
5345 * adjust disk_bytenr and num_bytes to cover just the bytes
5346 * in this extent we are about to write. If there
5347 * are any csums in that range we have to cow in order
5348 * to keep the csums correct
5349 */
5350 disk_bytenr += backref_offset;
5351 disk_bytenr += offset - key.offset;
5352 num_bytes = min(offset + len, extent_end) - offset;
5353 if (csum_exist_in_range(root, disk_bytenr, num_bytes))
5354 goto out;
5355 /*
5356 * all of the above have passed, it is safe to overwrite this extent
5357 * without cow
5358 */
5359 ret = 1;
5360out:
5361 btrfs_free_path(path);
5362 return ret;
5363}
5364
5365static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5366 struct buffer_head *bh_result, int create)
5367{
5368 struct extent_map *em;
5369 struct btrfs_root *root = BTRFS_I(inode)->root;
5370 u64 start = iblock << inode->i_blkbits;
5371 u64 len = bh_result->b_size;
5372 struct btrfs_trans_handle *trans;
5373
5374 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5375 if (IS_ERR(em))
5376 return PTR_ERR(em);
5377
5378 /*
5379 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
5380 * io. INLINE is special, and we could probably kludge it in here, but
5381 * it's still buffered so for safety lets just fall back to the generic
5382 * buffered path.
5383 *
5384 * For COMPRESSED we _have_ to read the entire extent in so we can
5385 * decompress it, so there will be buffering required no matter what we
5386 * do, so go ahead and fallback to buffered.
5387 *
5388 * We return -ENOTBLK because thats what makes DIO go ahead and go back
5389 * to buffered IO. Don't blame me, this is the price we pay for using
5390 * the generic code.
5391 */
5392 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5393 em->block_start == EXTENT_MAP_INLINE) {
5394 free_extent_map(em);
5395 return -ENOTBLK;
5396 }
5397
5398 /* Just a good old fashioned hole, return */
5399 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5400 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5401 free_extent_map(em);
5402 /* DIO will do one hole at a time, so just unlock a sector */
5403 unlock_extent(&BTRFS_I(inode)->io_tree, start,
5404 start + root->sectorsize - 1, GFP_NOFS);
5405 return 0;
5406 }
5407
5408 /*
5409 * We don't allocate a new extent in the following cases
5410 *
5411 * 1) The inode is marked as NODATACOW. In this case we'll just use the
5412 * existing extent.
5413 * 2) The extent is marked as PREALLOC. We're good to go here and can
5414 * just use the extent.
5415 *
5416 */
5417 if (!create) {
5418 len = em->len - (start - em->start);
5419 goto map;
5420 }
5421
5422 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
5423 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
5424 em->block_start != EXTENT_MAP_HOLE)) {
5425 int type;
5426 int ret;
5427 u64 block_start;
5428
5429 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5430 type = BTRFS_ORDERED_PREALLOC;
5431 else
5432 type = BTRFS_ORDERED_NOCOW;
5433 len = min(len, em->len - (start - em->start));
5434 block_start = em->block_start + (start - em->start);
5435
5436 /*
5437 * we're not going to log anything, but we do need
5438 * to make sure the current transaction stays open
5439 * while we look for nocow cross refs
5440 */
5441 trans = btrfs_join_transaction(root, 0);
5442 if (!trans)
5443 goto must_cow;
5444
5445 if (can_nocow_odirect(trans, inode, start, len) == 1) {
5446 ret = btrfs_add_ordered_extent_dio(inode, start,
5447 block_start, len, len, type);
5448 btrfs_end_transaction(trans, root);
5449 if (ret) {
5450 free_extent_map(em);
5451 return ret;
5452 }
5453 goto unlock;
5454 }
5455 btrfs_end_transaction(trans, root);
5456 }
5457must_cow:
5458 /*
5459 * this will cow the extent, reset the len in case we changed
5460 * it above
5461 */
5462 len = bh_result->b_size;
5463 free_extent_map(em);
5464 em = btrfs_new_extent_direct(inode, start, len);
5465 if (IS_ERR(em))
5466 return PTR_ERR(em);
5467 len = min(len, em->len - (start - em->start));
5468unlock:
5469 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5470 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5471 0, NULL, GFP_NOFS);
5472map:
5473 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5474 inode->i_blkbits;
5475 bh_result->b_size = len;
5476 bh_result->b_bdev = em->bdev;
5477 set_buffer_mapped(bh_result);
5478 if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5479 set_buffer_new(bh_result);
5480
5481 free_extent_map(em);
5482
5483 return 0;
5484}
5485
5486struct btrfs_dio_private {
5487 struct inode *inode;
5488 u64 logical_offset;
5489 u64 disk_bytenr;
5490 u64 bytes;
5491 u32 *csums;
5492 void *private;
5493};
5494
5495static void btrfs_endio_direct_read(struct bio *bio, int err)
5496{
5497 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5498 struct bio_vec *bvec = bio->bi_io_vec;
5499 struct btrfs_dio_private *dip = bio->bi_private;
5500 struct inode *inode = dip->inode;
5501 struct btrfs_root *root = BTRFS_I(inode)->root;
5502 u64 start;
5503 u32 *private = dip->csums;
5504
5505 start = dip->logical_offset;
5506 do {
5507 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
5508 struct page *page = bvec->bv_page;
5509 char *kaddr;
5510 u32 csum = ~(u32)0;
5511 unsigned long flags;
5512
5513 local_irq_save(flags);
5514 kaddr = kmap_atomic(page, KM_IRQ0);
5515 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
5516 csum, bvec->bv_len);
5517 btrfs_csum_final(csum, (char *)&csum);
5518 kunmap_atomic(kaddr, KM_IRQ0);
5519 local_irq_restore(flags);
5520
5521 flush_dcache_page(bvec->bv_page);
5522 if (csum != *private) {
5523 printk(KERN_ERR "btrfs csum failed ino %lu off"
5524 " %llu csum %u private %u\n",
5525 inode->i_ino, (unsigned long long)start,
5526 csum, *private);
5527 err = -EIO;
5528 }
5529 }
5530
5531 start += bvec->bv_len;
5532 private++;
5533 bvec++;
5534 } while (bvec <= bvec_end);
5535
5536 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
5537 dip->logical_offset + dip->bytes - 1, GFP_NOFS);
5538 bio->bi_private = dip->private;
5539
5540 kfree(dip->csums);
5541 kfree(dip);
5542 dio_end_io(bio, err);
5543}
5544
5545static void btrfs_endio_direct_write(struct bio *bio, int err)
5546{
5547 struct btrfs_dio_private *dip = bio->bi_private;
5548 struct inode *inode = dip->inode;
5549 struct btrfs_root *root = BTRFS_I(inode)->root;
5550 struct btrfs_trans_handle *trans;
5551 struct btrfs_ordered_extent *ordered = NULL;
5552 struct extent_state *cached_state = NULL;
5553 int ret;
5554
5555 if (err)
5556 goto out_done;
5557
5558 ret = btrfs_dec_test_ordered_pending(inode, &ordered,
5559 dip->logical_offset, dip->bytes);
5560 if (!ret)
5561 goto out_done;
5562
5563 BUG_ON(!ordered);
5564
5565 trans = btrfs_join_transaction(root, 1);
5566 if (!trans) {
5567 err = -ENOMEM;
5568 goto out;
5569 }
5570 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
5571
5572 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
5573 ret = btrfs_ordered_update_i_size(inode, 0, ordered);
5574 if (!ret)
5575 ret = btrfs_update_inode(trans, root, inode);
5576 err = ret;
5577 goto out;
5578 }
5579
5580 lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5581 ordered->file_offset + ordered->len - 1, 0,
5582 &cached_state, GFP_NOFS);
5583
5584 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) {
5585 ret = btrfs_mark_extent_written(trans, inode,
5586 ordered->file_offset,
5587 ordered->file_offset +
5588 ordered->len);
5589 if (ret) {
5590 err = ret;
5591 goto out_unlock;
5592 }
5593 } else {
5594 ret = insert_reserved_file_extent(trans, inode,
5595 ordered->file_offset,
5596 ordered->start,
5597 ordered->disk_len,
5598 ordered->len,
5599 ordered->len,
5600 0, 0, 0,
5601 BTRFS_FILE_EXTENT_REG);
5602 unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
5603 ordered->file_offset, ordered->len);
5604 if (ret) {
5605 err = ret;
5606 WARN_ON(1);
5607 goto out_unlock;
5608 }
5609 }
5610
5611 add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
5612 btrfs_ordered_update_i_size(inode, 0, ordered);
5613 btrfs_update_inode(trans, root, inode);
5614out_unlock:
5615 unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
5616 ordered->file_offset + ordered->len - 1,
5617 &cached_state, GFP_NOFS);
5618out:
5619 btrfs_delalloc_release_metadata(inode, ordered->len);
5620 btrfs_end_transaction(trans, root);
5621 btrfs_put_ordered_extent(ordered);
5622 btrfs_put_ordered_extent(ordered);
5623out_done:
5624 bio->bi_private = dip->private;
5625
5626 kfree(dip->csums);
5627 kfree(dip);
5628 dio_end_io(bio, err);
5629}
5630
5631static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5632 struct bio *bio, int mirror_num,
5633 unsigned long bio_flags, u64 offset)
5634{
5635 int ret;
5636 struct btrfs_root *root = BTRFS_I(inode)->root;
5637 ret = btrfs_csum_one_bio(root, inode, bio, offset, 1);
5638 BUG_ON(ret);
5639 return 0;
5640}
5641
5642static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5643 loff_t file_offset)
5644{
5645 struct btrfs_root *root = BTRFS_I(inode)->root;
5646 struct btrfs_dio_private *dip;
5647 struct bio_vec *bvec = bio->bi_io_vec;
5648 u64 start;
5649 int skip_sum;
5650 int write = rw & REQ_WRITE;
5651 int ret = 0;
5652
5653 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
5654
5655 dip = kmalloc(sizeof(*dip), GFP_NOFS);
5656 if (!dip) {
5657 ret = -ENOMEM;
5658 goto free_ordered;
5659 }
5660 dip->csums = NULL;
5661
5662 if (!skip_sum) {
5663 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
5664 if (!dip->csums) {
5665 ret = -ENOMEM;
5666 goto free_ordered;
5667 }
5668 }
5669
5670 dip->private = bio->bi_private;
5671 dip->inode = inode;
5672 dip->logical_offset = file_offset;
5673
5674 start = dip->logical_offset;
5675 dip->bytes = 0;
5676 do {
5677 dip->bytes += bvec->bv_len;
5678 bvec++;
5679 } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
5680
5681 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5682 bio->bi_private = dip;
5683
5684 if (write)
5685 bio->bi_end_io = btrfs_endio_direct_write;
5686 else
5687 bio->bi_end_io = btrfs_endio_direct_read;
5688
5689 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5690 if (ret)
5691 goto out_err;
5692
5693 if (write && !skip_sum) {
5694 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
5695 inode, rw, bio, 0, 0,
5696 dip->logical_offset,
5697 __btrfs_submit_bio_start_direct_io,
5698 __btrfs_submit_bio_done);
5699 if (ret)
5700 goto out_err;
5701 return;
5702 } else if (!skip_sum)
5703 btrfs_lookup_bio_sums_dio(root, inode, bio,
5704 dip->logical_offset, dip->csums);
5705
5706 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5707 if (ret)
5708 goto out_err;
5709 return;
5710out_err:
5711 kfree(dip->csums);
5712 kfree(dip);
5713free_ordered:
5714 /*
5715 * If this is a write, we need to clean up the reserved space and kill
5716 * the ordered extent.
5717 */
5718 if (write) {
5719 struct btrfs_ordered_extent *ordered;
5720 ordered = btrfs_lookup_ordered_extent(inode,
5721 dip->logical_offset);
5722 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5723 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5724 btrfs_free_reserved_extent(root, ordered->start,
5725 ordered->disk_len);
5726 btrfs_put_ordered_extent(ordered);
5727 btrfs_put_ordered_extent(ordered);
5728 }
5729 bio_endio(bio, ret);
5730}
5731
5732static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
5733 const struct iovec *iov, loff_t offset,
5734 unsigned long nr_segs)
5735{
5736 int seg;
5737 size_t size;
5738 unsigned long addr;
5739 unsigned blocksize_mask = root->sectorsize - 1;
5740 ssize_t retval = -EINVAL;
5741 loff_t end = offset;
5742
5743 if (offset & blocksize_mask)
5744 goto out;
5745
5746 /* Check the memory alignment. Blocks cannot straddle pages */
5747 for (seg = 0; seg < nr_segs; seg++) {
5748 addr = (unsigned long)iov[seg].iov_base;
5749 size = iov[seg].iov_len;
5750 end += size;
5751 if ((addr & blocksize_mask) || (size & blocksize_mask))
5752 goto out;
5753 }
5754 retval = 0;
5755out:
5756 return retval;
5757}
4944static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 5758static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
4945 const struct iovec *iov, loff_t offset, 5759 const struct iovec *iov, loff_t offset,
4946 unsigned long nr_segs) 5760 unsigned long nr_segs)
4947{ 5761{
4948 return -EINVAL; 5762 struct file *file = iocb->ki_filp;
5763 struct inode *inode = file->f_mapping->host;
5764 struct btrfs_ordered_extent *ordered;
5765 struct extent_state *cached_state = NULL;
5766 u64 lockstart, lockend;
5767 ssize_t ret;
5768 int writing = rw & WRITE;
5769 int write_bits = 0;
5770 size_t count = iov_length(iov, nr_segs);
5771
5772 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
5773 offset, nr_segs)) {
5774 return 0;
5775 }
5776
5777 lockstart = offset;
5778 lockend = offset + count - 1;
5779
5780 if (writing) {
5781 ret = btrfs_delalloc_reserve_space(inode, count);
5782 if (ret)
5783 goto out;
5784 }
5785
5786 while (1) {
5787 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5788 0, &cached_state, GFP_NOFS);
5789 /*
5790 * We're concerned with the entire range that we're going to be
5791 * doing DIO to, so we need to make sure theres no ordered
5792 * extents in this range.
5793 */
5794 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5795 lockend - lockstart + 1);
5796 if (!ordered)
5797 break;
5798 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5799 &cached_state, GFP_NOFS);
5800 btrfs_start_ordered_extent(inode, ordered, 1);
5801 btrfs_put_ordered_extent(ordered);
5802 cond_resched();
5803 }
5804
5805 /*
5806 * we don't use btrfs_set_extent_delalloc because we don't want
5807 * the dirty or uptodate bits
5808 */
5809 if (writing) {
5810 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
5811 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5812 EXTENT_DELALLOC, 0, NULL, &cached_state,
5813 GFP_NOFS);
5814 if (ret) {
5815 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5816 lockend, EXTENT_LOCKED | write_bits,
5817 1, 0, &cached_state, GFP_NOFS);
5818 goto out;
5819 }
5820 }
5821
5822 free_extent_state(cached_state);
5823 cached_state = NULL;
5824
5825 ret = __blockdev_direct_IO(rw, iocb, inode,
5826 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
5827 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
5828 btrfs_submit_direct, 0);
5829
5830 if (ret < 0 && ret != -EIOCBQUEUED) {
5831 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
5832 offset + iov_length(iov, nr_segs) - 1,
5833 EXTENT_LOCKED | write_bits, 1, 0,
5834 &cached_state, GFP_NOFS);
5835 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
5836 /*
5837 * We're falling back to buffered, unlock the section we didn't
5838 * do IO on.
5839 */
5840 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
5841 offset + iov_length(iov, nr_segs) - 1,
5842 EXTENT_LOCKED | write_bits, 1, 0,
5843 &cached_state, GFP_NOFS);
5844 }
5845out:
5846 free_extent_state(cached_state);
5847 return ret;
4949} 5848}
4950 5849
4951static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 5850static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
@@ -5021,6 +5920,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5021{ 5920{
5022 struct extent_io_tree *tree; 5921 struct extent_io_tree *tree;
5023 struct btrfs_ordered_extent *ordered; 5922 struct btrfs_ordered_extent *ordered;
5923 struct extent_state *cached_state = NULL;
5024 u64 page_start = page_offset(page); 5924 u64 page_start = page_offset(page);
5025 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 5925 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
5026 5926
@@ -5039,7 +5939,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5039 btrfs_releasepage(page, GFP_NOFS); 5939 btrfs_releasepage(page, GFP_NOFS);
5040 return; 5940 return;
5041 } 5941 }
5042 lock_extent(tree, page_start, page_end, GFP_NOFS); 5942 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
5943 GFP_NOFS);
5043 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 5944 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
5044 page_offset(page)); 5945 page_offset(page));
5045 if (ordered) { 5946 if (ordered) {
@@ -5050,7 +5951,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5050 clear_extent_bit(tree, page_start, page_end, 5951 clear_extent_bit(tree, page_start, page_end,
5051 EXTENT_DIRTY | EXTENT_DELALLOC | 5952 EXTENT_DIRTY | EXTENT_DELALLOC |
5052 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 5953 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
5053 NULL, GFP_NOFS); 5954 &cached_state, GFP_NOFS);
5054 /* 5955 /*
5055 * whoever cleared the private bit is responsible 5956 * whoever cleared the private bit is responsible
5056 * for the finish_ordered_io 5957 * for the finish_ordered_io
@@ -5060,11 +5961,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5060 page_start, page_end); 5961 page_start, page_end);
5061 } 5962 }
5062 btrfs_put_ordered_extent(ordered); 5963 btrfs_put_ordered_extent(ordered);
5063 lock_extent(tree, page_start, page_end, GFP_NOFS); 5964 cached_state = NULL;
5965 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
5966 GFP_NOFS);
5064 } 5967 }
5065 clear_extent_bit(tree, page_start, page_end, 5968 clear_extent_bit(tree, page_start, page_end,
5066 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 5969 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
5067 EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 5970 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
5068 __btrfs_releasepage(page, GFP_NOFS); 5971 __btrfs_releasepage(page, GFP_NOFS);
5069 5972
5070 ClearPageChecked(page); 5973 ClearPageChecked(page);
@@ -5097,6 +6000,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5097 struct btrfs_root *root = BTRFS_I(inode)->root; 6000 struct btrfs_root *root = BTRFS_I(inode)->root;
5098 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 6001 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5099 struct btrfs_ordered_extent *ordered; 6002 struct btrfs_ordered_extent *ordered;
6003 struct extent_state *cached_state = NULL;
5100 char *kaddr; 6004 char *kaddr;
5101 unsigned long zero_start; 6005 unsigned long zero_start;
5102 loff_t size; 6006 loff_t size;
@@ -5104,7 +6008,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5104 u64 page_start; 6008 u64 page_start;
5105 u64 page_end; 6009 u64 page_end;
5106 6010
5107 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); 6011 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
5108 if (ret) { 6012 if (ret) {
5109 if (ret == -ENOMEM) 6013 if (ret == -ENOMEM)
5110 ret = VM_FAULT_OOM; 6014 ret = VM_FAULT_OOM;
@@ -5113,13 +6017,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5113 goto out; 6017 goto out;
5114 } 6018 }
5115 6019
5116 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
5117 if (ret) {
5118 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5119 ret = VM_FAULT_SIGBUS;
5120 goto out;
5121 }
5122
5123 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ 6020 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
5124again: 6021again:
5125 lock_page(page); 6022 lock_page(page);
@@ -5129,13 +6026,13 @@ again:
5129 6026
5130 if ((page->mapping != inode->i_mapping) || 6027 if ((page->mapping != inode->i_mapping) ||
5131 (page_start >= size)) { 6028 (page_start >= size)) {
5132 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5133 /* page got truncated out from underneath us */ 6029 /* page got truncated out from underneath us */
5134 goto out_unlock; 6030 goto out_unlock;
5135 } 6031 }
5136 wait_on_page_writeback(page); 6032 wait_on_page_writeback(page);
5137 6033
5138 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 6034 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
6035 GFP_NOFS);
5139 set_page_extent_mapped(page); 6036 set_page_extent_mapped(page);
5140 6037
5141 /* 6038 /*
@@ -5144,7 +6041,8 @@ again:
5144 */ 6041 */
5145 ordered = btrfs_lookup_ordered_extent(inode, page_start); 6042 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5146 if (ordered) { 6043 if (ordered) {
5147 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 6044 unlock_extent_cached(io_tree, page_start, page_end,
6045 &cached_state, GFP_NOFS);
5148 unlock_page(page); 6046 unlock_page(page);
5149 btrfs_start_ordered_extent(inode, ordered, 1); 6047 btrfs_start_ordered_extent(inode, ordered, 1);
5150 btrfs_put_ordered_extent(ordered); 6048 btrfs_put_ordered_extent(ordered);
@@ -5158,15 +6056,16 @@ again:
5158 * is probably a better way to do this, but for now keep consistent with 6056 * is probably a better way to do this, but for now keep consistent with
5159 * prepare_pages in the normal write path. 6057 * prepare_pages in the normal write path.
5160 */ 6058 */
5161 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 6059 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
5162 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 6060 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
5163 GFP_NOFS); 6061 0, 0, &cached_state, GFP_NOFS);
5164 6062
5165 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 6063 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
6064 &cached_state);
5166 if (ret) { 6065 if (ret) {
5167 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 6066 unlock_extent_cached(io_tree, page_start, page_end,
6067 &cached_state, GFP_NOFS);
5168 ret = VM_FAULT_SIGBUS; 6068 ret = VM_FAULT_SIGBUS;
5169 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5170 goto out_unlock; 6069 goto out_unlock;
5171 } 6070 }
5172 ret = 0; 6071 ret = 0;
@@ -5190,13 +6089,13 @@ again:
5190 BTRFS_I(inode)->last_trans = root->fs_info->generation; 6089 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5191 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 6090 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5192 6091
5193 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 6092 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5194 6093
5195out_unlock: 6094out_unlock:
5196 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
5197 if (!ret) 6095 if (!ret)
5198 return VM_FAULT_LOCKED; 6096 return VM_FAULT_LOCKED;
5199 unlock_page(page); 6097 unlock_page(page);
6098 btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
5200out: 6099out:
5201 return ret; 6100 return ret;
5202} 6101}
@@ -5221,8 +6120,10 @@ static void btrfs_truncate(struct inode *inode)
5221 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 6120 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
5222 btrfs_ordered_update_i_size(inode, inode->i_size, NULL); 6121 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
5223 6122
5224 trans = btrfs_start_transaction(root, 1); 6123 trans = btrfs_start_transaction(root, 0);
6124 BUG_ON(IS_ERR(trans));
5225 btrfs_set_trans_block_group(trans, inode); 6125 btrfs_set_trans_block_group(trans, inode);
6126 trans->block_rsv = root->orphan_block_rsv;
5226 6127
5227 /* 6128 /*
5228 * setattr is responsible for setting the ordered_data_close flag, 6129 * setattr is responsible for setting the ordered_data_close flag,
@@ -5245,6 +6146,23 @@ static void btrfs_truncate(struct inode *inode)
5245 btrfs_add_ordered_operation(trans, root, inode); 6146 btrfs_add_ordered_operation(trans, root, inode);
5246 6147
5247 while (1) { 6148 while (1) {
6149 if (!trans) {
6150 trans = btrfs_start_transaction(root, 0);
6151 BUG_ON(IS_ERR(trans));
6152 btrfs_set_trans_block_group(trans, inode);
6153 trans->block_rsv = root->orphan_block_rsv;
6154 }
6155
6156 ret = btrfs_block_rsv_check(trans, root,
6157 root->orphan_block_rsv, 0, 5);
6158 if (ret) {
6159 BUG_ON(ret != -EAGAIN);
6160 ret = btrfs_commit_transaction(trans, root);
6161 BUG_ON(ret);
6162 trans = NULL;
6163 continue;
6164 }
6165
5248 ret = btrfs_truncate_inode_items(trans, root, inode, 6166 ret = btrfs_truncate_inode_items(trans, root, inode,
5249 inode->i_size, 6167 inode->i_size,
5250 BTRFS_EXTENT_DATA_KEY); 6168 BTRFS_EXTENT_DATA_KEY);
@@ -5256,10 +6174,8 @@ static void btrfs_truncate(struct inode *inode)
5256 6174
5257 nr = trans->blocks_used; 6175 nr = trans->blocks_used;
5258 btrfs_end_transaction(trans, root); 6176 btrfs_end_transaction(trans, root);
6177 trans = NULL;
5259 btrfs_btree_balance_dirty(root, nr); 6178 btrfs_btree_balance_dirty(root, nr);
5260
5261 trans = btrfs_start_transaction(root, 1);
5262 btrfs_set_trans_block_group(trans, inode);
5263 } 6179 }
5264 6180
5265 if (ret == 0 && inode->i_nlink > 0) { 6181 if (ret == 0 && inode->i_nlink > 0) {
@@ -5320,21 +6236,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping,
5320struct inode *btrfs_alloc_inode(struct super_block *sb) 6236struct inode *btrfs_alloc_inode(struct super_block *sb)
5321{ 6237{
5322 struct btrfs_inode *ei; 6238 struct btrfs_inode *ei;
6239 struct inode *inode;
5323 6240
5324 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); 6241 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
5325 if (!ei) 6242 if (!ei)
5326 return NULL; 6243 return NULL;
6244
6245 ei->root = NULL;
6246 ei->space_info = NULL;
6247 ei->generation = 0;
6248 ei->sequence = 0;
5327 ei->last_trans = 0; 6249 ei->last_trans = 0;
5328 ei->last_sub_trans = 0; 6250 ei->last_sub_trans = 0;
5329 ei->logged_trans = 0; 6251 ei->logged_trans = 0;
5330 ei->outstanding_extents = 0; 6252 ei->delalloc_bytes = 0;
5331 ei->reserved_extents = 0; 6253 ei->reserved_bytes = 0;
5332 ei->root = NULL; 6254 ei->disk_i_size = 0;
6255 ei->flags = 0;
6256 ei->index_cnt = (u64)-1;
6257 ei->last_unlink_trans = 0;
6258
5333 spin_lock_init(&ei->accounting_lock); 6259 spin_lock_init(&ei->accounting_lock);
6260 atomic_set(&ei->outstanding_extents, 0);
6261 ei->reserved_extents = 0;
6262
6263 ei->ordered_data_close = 0;
6264 ei->orphan_meta_reserved = 0;
6265 ei->dummy_inode = 0;
6266 ei->force_compress = 0;
6267
6268 inode = &ei->vfs_inode;
6269 extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
6270 extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
6271 extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
6272 mutex_init(&ei->log_mutex);
5334 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 6273 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
5335 INIT_LIST_HEAD(&ei->i_orphan); 6274 INIT_LIST_HEAD(&ei->i_orphan);
6275 INIT_LIST_HEAD(&ei->delalloc_inodes);
5336 INIT_LIST_HEAD(&ei->ordered_operations); 6276 INIT_LIST_HEAD(&ei->ordered_operations);
5337 return &ei->vfs_inode; 6277 RB_CLEAR_NODE(&ei->rb_node);
6278
6279 return inode;
5338} 6280}
5339 6281
5340void btrfs_destroy_inode(struct inode *inode) 6282void btrfs_destroy_inode(struct inode *inode)
@@ -5344,6 +6286,8 @@ void btrfs_destroy_inode(struct inode *inode)
5344 6286
5345 WARN_ON(!list_empty(&inode->i_dentry)); 6287 WARN_ON(!list_empty(&inode->i_dentry));
5346 WARN_ON(inode->i_data.nrpages); 6288 WARN_ON(inode->i_data.nrpages);
6289 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
6290 WARN_ON(BTRFS_I(inode)->reserved_extents);
5347 6291
5348 /* 6292 /*
5349 * This can happen where we create an inode, but somebody else also 6293 * This can happen where we create an inode, but somebody else also
@@ -5364,13 +6308,13 @@ void btrfs_destroy_inode(struct inode *inode)
5364 spin_unlock(&root->fs_info->ordered_extent_lock); 6308 spin_unlock(&root->fs_info->ordered_extent_lock);
5365 } 6309 }
5366 6310
5367 spin_lock(&root->list_lock); 6311 spin_lock(&root->orphan_lock);
5368 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6312 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
5369 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6313 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
5370 inode->i_ino); 6314 inode->i_ino);
5371 list_del_init(&BTRFS_I(inode)->i_orphan); 6315 list_del_init(&BTRFS_I(inode)->i_orphan);
5372 } 6316 }
5373 spin_unlock(&root->list_lock); 6317 spin_unlock(&root->orphan_lock);
5374 6318
5375 while (1) { 6319 while (1) {
5376 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 6320 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -5392,14 +6336,14 @@ free:
5392 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6336 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
5393} 6337}
5394 6338
5395void btrfs_drop_inode(struct inode *inode) 6339int btrfs_drop_inode(struct inode *inode)
5396{ 6340{
5397 struct btrfs_root *root = BTRFS_I(inode)->root; 6341 struct btrfs_root *root = BTRFS_I(inode)->root;
5398 6342
5399 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 6343 if (btrfs_root_refs(&root->root_item) == 0)
5400 generic_delete_inode(inode); 6344 return 1;
5401 else 6345 else
5402 generic_drop_inode(inode); 6346 return generic_drop_inode(inode);
5403} 6347}
5404 6348
5405static void init_once(void *foo) 6349static void init_once(void *foo)
@@ -5492,19 +6436,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5492 if (S_ISDIR(old_inode->i_mode) && new_inode && 6436 if (S_ISDIR(old_inode->i_mode) && new_inode &&
5493 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) 6437 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
5494 return -ENOTEMPTY; 6438 return -ENOTEMPTY;
5495
5496 /*
5497 * We want to reserve the absolute worst case amount of items. So if
5498 * both inodes are subvols and we need to unlink them then that would
5499 * require 4 item modifications, but if they are both normal inodes it
5500 * would require 5 item modifications, so we'll assume their normal
5501 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
5502 * should cover the worst case number of items we'll modify.
5503 */
5504 ret = btrfs_reserve_metadata_space(root, 11);
5505 if (ret)
5506 return ret;
5507
5508 /* 6439 /*
5509 * we're using rename to replace one file with another. 6440 * we're using rename to replace one file with another.
5510 * and the replacement file is large. Start IO on it now so 6441 * and the replacement file is large. Start IO on it now so
@@ -5517,8 +6448,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
5517 /* close the racy window with snapshot create/destroy ioctl */ 6448 /* close the racy window with snapshot create/destroy ioctl */
5518 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6449 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5519 down_read(&root->fs_info->subvol_sem); 6450 down_read(&root->fs_info->subvol_sem);
6451 /*
6452 * We want to reserve the absolute worst case amount of items. So if
6453 * both inodes are subvols and we need to unlink them then that would
6454 * require 4 item modifications, but if they are both normal inodes it
6455 * would require 5 item modifications, so we'll assume their normal
6456 * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
6457 * should cover the worst case number of items we'll modify.
6458 */
6459 trans = btrfs_start_transaction(root, 20);
6460 if (IS_ERR(trans))
6461 return PTR_ERR(trans);
5520 6462
5521 trans = btrfs_start_transaction(root, 1);
5522 btrfs_set_trans_block_group(trans, new_dir); 6463 btrfs_set_trans_block_group(trans, new_dir);
5523 6464
5524 if (dest != root) 6465 if (dest != root)
@@ -5617,7 +6558,6 @@ out_fail:
5617 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 6558 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
5618 up_read(&root->fs_info->subvol_sem); 6559 up_read(&root->fs_info->subvol_sem);
5619 6560
5620 btrfs_unreserve_metadata_space(root, 11);
5621 return ret; 6561 return ret;
5622} 6562}
5623 6563
@@ -5669,6 +6609,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
5669 return 0; 6609 return 0;
5670} 6610}
5671 6611
6612int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
6613{
6614 struct btrfs_inode *binode;
6615 struct inode *inode = NULL;
6616
6617 spin_lock(&root->fs_info->delalloc_lock);
6618 while (!list_empty(&root->fs_info->delalloc_inodes)) {
6619 binode = list_entry(root->fs_info->delalloc_inodes.next,
6620 struct btrfs_inode, delalloc_inodes);
6621 inode = igrab(&binode->vfs_inode);
6622 if (inode) {
6623 list_move_tail(&binode->delalloc_inodes,
6624 &root->fs_info->delalloc_inodes);
6625 break;
6626 }
6627
6628 list_del_init(&binode->delalloc_inodes);
6629 cond_resched_lock(&root->fs_info->delalloc_lock);
6630 }
6631 spin_unlock(&root->fs_info->delalloc_lock);
6632
6633 if (inode) {
6634 write_inode_now(inode, 0);
6635 if (delay_iput)
6636 btrfs_add_delayed_iput(inode);
6637 else
6638 iput(inode);
6639 return 1;
6640 }
6641 return 0;
6642}
6643
5672static int btrfs_symlink(struct inode *dir, struct dentry *dentry, 6644static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5673 const char *symname) 6645 const char *symname)
5674{ 6646{
@@ -5692,26 +6664,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5692 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 6664 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
5693 return -ENAMETOOLONG; 6665 return -ENAMETOOLONG;
5694 6666
6667 err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
6668 if (err)
6669 return err;
5695 /* 6670 /*
5696 * 2 items for inode item and ref 6671 * 2 items for inode item and ref
5697 * 2 items for dir items 6672 * 2 items for dir items
5698 * 1 item for xattr if selinux is on 6673 * 1 item for xattr if selinux is on
5699 */ 6674 */
5700 err = btrfs_reserve_metadata_space(root, 5); 6675 trans = btrfs_start_transaction(root, 5);
5701 if (err) 6676 if (IS_ERR(trans))
5702 return err; 6677 return PTR_ERR(trans);
5703 6678
5704 trans = btrfs_start_transaction(root, 1);
5705 if (!trans)
5706 goto out_fail;
5707 btrfs_set_trans_block_group(trans, dir); 6679 btrfs_set_trans_block_group(trans, dir);
5708 6680
5709 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
5710 if (err) {
5711 err = -ENOSPC;
5712 goto out_unlock;
5713 }
5714
5715 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 6681 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
5716 dentry->d_name.len, 6682 dentry->d_name.len,
5717 dentry->d_parent->d_inode->i_ino, objectid, 6683 dentry->d_parent->d_inode->i_ino, objectid,
@@ -5783,8 +6749,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
5783out_unlock: 6749out_unlock:
5784 nr = trans->blocks_used; 6750 nr = trans->blocks_used;
5785 btrfs_end_transaction_throttle(trans, root); 6751 btrfs_end_transaction_throttle(trans, root);
5786out_fail:
5787 btrfs_unreserve_metadata_space(root, 5);
5788 if (drop_inode) { 6752 if (drop_inode) {
5789 inode_dec_link_count(inode); 6753 inode_dec_link_count(inode);
5790 iput(inode); 6754 iput(inode);
@@ -5793,36 +6757,28 @@ out_fail:
5793 return err; 6757 return err;
5794} 6758}
5795 6759
5796static int prealloc_file_range(struct inode *inode, u64 start, u64 end, 6760int btrfs_prealloc_file_range(struct inode *inode, int mode,
5797 u64 alloc_hint, int mode, loff_t actual_len) 6761 u64 start, u64 num_bytes, u64 min_size,
6762 loff_t actual_len, u64 *alloc_hint)
5798{ 6763{
5799 struct btrfs_trans_handle *trans; 6764 struct btrfs_trans_handle *trans;
5800 struct btrfs_root *root = BTRFS_I(inode)->root; 6765 struct btrfs_root *root = BTRFS_I(inode)->root;
5801 struct btrfs_key ins; 6766 struct btrfs_key ins;
5802 u64 alloc_size;
5803 u64 cur_offset = start; 6767 u64 cur_offset = start;
5804 u64 num_bytes = end - start;
5805 int ret = 0; 6768 int ret = 0;
5806 u64 i_size;
5807 6769
5808 while (num_bytes > 0) { 6770 while (num_bytes > 0) {
5809 alloc_size = min(num_bytes, root->fs_info->max_extent); 6771 trans = btrfs_start_transaction(root, 3);
5810 6772 if (IS_ERR(trans)) {
5811 trans = btrfs_start_transaction(root, 1); 6773 ret = PTR_ERR(trans);
5812 6774 break;
5813 ret = btrfs_reserve_extent(trans, root, alloc_size,
5814 root->sectorsize, 0, alloc_hint,
5815 (u64)-1, &ins, 1);
5816 if (ret) {
5817 WARN_ON(1);
5818 goto stop_trans;
5819 } 6775 }
5820 6776
5821 ret = btrfs_reserve_metadata_space(root, 3); 6777 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6778 0, *alloc_hint, (u64)-1, &ins, 1);
5822 if (ret) { 6779 if (ret) {
5823 btrfs_free_reserved_extent(root, ins.objectid, 6780 btrfs_end_transaction(trans, root);
5824 ins.offset); 6781 break;
5825 goto stop_trans;
5826 } 6782 }
5827 6783
5828 ret = insert_reserved_file_extent(trans, inode, 6784 ret = insert_reserved_file_extent(trans, inode,
@@ -5836,37 +6792,33 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5836 6792
5837 num_bytes -= ins.offset; 6793 num_bytes -= ins.offset;
5838 cur_offset += ins.offset; 6794 cur_offset += ins.offset;
5839 alloc_hint = ins.objectid + ins.offset; 6795 *alloc_hint = ins.objectid + ins.offset;
5840 6796
5841 inode->i_ctime = CURRENT_TIME; 6797 inode->i_ctime = CURRENT_TIME;
5842 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; 6798 BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
5843 if (!(mode & FALLOC_FL_KEEP_SIZE) && 6799 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
5844 cur_offset > inode->i_size) { 6800 (actual_len > inode->i_size) &&
6801 (cur_offset > inode->i_size)) {
5845 if (cur_offset > actual_len) 6802 if (cur_offset > actual_len)
5846 i_size = actual_len; 6803 i_size_write(inode, actual_len);
5847 else 6804 else
5848 i_size = cur_offset; 6805 i_size_write(inode, cur_offset);
5849 i_size_write(inode, i_size); 6806 i_size_write(inode, cur_offset);
5850 btrfs_ordered_update_i_size(inode, i_size, NULL); 6807 btrfs_ordered_update_i_size(inode, cur_offset, NULL);
5851 } 6808 }
5852 6809
5853 ret = btrfs_update_inode(trans, root, inode); 6810 ret = btrfs_update_inode(trans, root, inode);
5854 BUG_ON(ret); 6811 BUG_ON(ret);
5855 6812
5856 btrfs_end_transaction(trans, root); 6813 btrfs_end_transaction(trans, root);
5857 btrfs_unreserve_metadata_space(root, 3);
5858 } 6814 }
5859 return ret; 6815 return ret;
5860
5861stop_trans:
5862 btrfs_end_transaction(trans, root);
5863 return ret;
5864
5865} 6816}
5866 6817
5867static long btrfs_fallocate(struct inode *inode, int mode, 6818static long btrfs_fallocate(struct inode *inode, int mode,
5868 loff_t offset, loff_t len) 6819 loff_t offset, loff_t len)
5869{ 6820{
6821 struct extent_state *cached_state = NULL;
5870 u64 cur_offset; 6822 u64 cur_offset;
5871 u64 last_byte; 6823 u64 last_byte;
5872 u64 alloc_start; 6824 u64 alloc_start;
@@ -5893,8 +6845,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5893 goto out; 6845 goto out;
5894 } 6846 }
5895 6847
5896 ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, 6848 ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
5897 alloc_end - alloc_start);
5898 if (ret) 6849 if (ret)
5899 goto out; 6850 goto out;
5900 6851
@@ -5905,16 +6856,17 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5905 /* the extent lock is ordered inside the running 6856 /* the extent lock is ordered inside the running
5906 * transaction 6857 * transaction
5907 */ 6858 */
5908 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 6859 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
5909 GFP_NOFS); 6860 locked_end, 0, &cached_state, GFP_NOFS);
5910 ordered = btrfs_lookup_first_ordered_extent(inode, 6861 ordered = btrfs_lookup_first_ordered_extent(inode,
5911 alloc_end - 1); 6862 alloc_end - 1);
5912 if (ordered && 6863 if (ordered &&
5913 ordered->file_offset + ordered->len > alloc_start && 6864 ordered->file_offset + ordered->len > alloc_start &&
5914 ordered->file_offset < alloc_end) { 6865 ordered->file_offset < alloc_end) {
5915 btrfs_put_ordered_extent(ordered); 6866 btrfs_put_ordered_extent(ordered);
5916 unlock_extent(&BTRFS_I(inode)->io_tree, 6867 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
5917 alloc_start, locked_end, GFP_NOFS); 6868 alloc_start, locked_end,
6869 &cached_state, GFP_NOFS);
5918 /* 6870 /*
5919 * we can't wait on the range with the transaction 6871 * we can't wait on the range with the transaction
5920 * running or with the extent lock held 6872 * running or with the extent lock held
@@ -5938,16 +6890,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5938 if (em->block_start == EXTENT_MAP_HOLE || 6890 if (em->block_start == EXTENT_MAP_HOLE ||
5939 (cur_offset >= inode->i_size && 6891 (cur_offset >= inode->i_size &&
5940 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6892 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5941 ret = prealloc_file_range(inode, 6893 ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
5942 cur_offset, last_byte, 6894 last_byte - cur_offset,
5943 alloc_hint, mode, offset+len); 6895 1 << inode->i_blkbits,
6896 offset + len,
6897 &alloc_hint);
5944 if (ret < 0) { 6898 if (ret < 0) {
5945 free_extent_map(em); 6899 free_extent_map(em);
5946 break; 6900 break;
5947 } 6901 }
5948 } 6902 }
5949 if (em->block_start <= EXTENT_MAP_LAST_BYTE)
5950 alloc_hint = em->block_start;
5951 free_extent_map(em); 6903 free_extent_map(em);
5952 6904
5953 cur_offset = last_byte; 6905 cur_offset = last_byte;
@@ -5956,11 +6908,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5956 break; 6908 break;
5957 } 6909 }
5958 } 6910 }
5959 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 6911 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5960 GFP_NOFS); 6912 &cached_state, GFP_NOFS);
5961 6913
5962 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 6914 btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
5963 alloc_end - alloc_start);
5964out: 6915out:
5965 mutex_unlock(&inode->i_mutex); 6916 mutex_unlock(&inode->i_mutex);
5966 return ret; 6917 return ret;