aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c277
1 files changed, 201 insertions, 76 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8f0706210a47..a0d1dd492a58 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -102,34 +102,6 @@ static int btrfs_init_inode_security(struct inode *inode, struct inode *dir)
102} 102}
103 103
104/* 104/*
105 * a very lame attempt at stopping writes when the FS is 85% full. There
106 * are countless ways this is incorrect, but it is better than nothing.
107 */
108int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
109 int for_del)
110{
111 u64 total;
112 u64 used;
113 u64 thresh;
114 int ret = 0;
115
116 spin_lock(&root->fs_info->delalloc_lock);
117 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
118 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
119 if (for_del)
120 thresh = total * 90;
121 else
122 thresh = total * 85;
123
124 do_div(thresh, 100);
125
126 if (used + root->fs_info->delalloc_bytes + num_required > thresh)
127 ret = -ENOSPC;
128 spin_unlock(&root->fs_info->delalloc_lock);
129 return ret;
130}
131
132/*
133 * this does all the hard work for inserting an inline extent into 105 * this does all the hard work for inserting an inline extent into
134 * the btree. The caller should have done a btrfs_drop_extents so that 106 * the btree. The caller should have done a btrfs_drop_extents so that
135 * no overlapping inline items exist in the btree 107 * no overlapping inline items exist in the btree
@@ -162,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
162 if (!path) 134 if (!path)
163 return -ENOMEM; 135 return -ENOMEM;
164 136
137 path->leave_spinning = 1;
165 btrfs_set_trans_block_group(trans, inode); 138 btrfs_set_trans_block_group(trans, inode);
166 139
167 key.objectid = inode->i_ino; 140 key.objectid = inode->i_ino;
@@ -195,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
195 cur_size = min_t(unsigned long, compressed_size, 168 cur_size = min_t(unsigned long, compressed_size,
196 PAGE_CACHE_SIZE); 169 PAGE_CACHE_SIZE);
197 170
198 kaddr = kmap(cpage); 171 kaddr = kmap_atomic(cpage, KM_USER0);
199 write_extent_buffer(leaf, kaddr, ptr, cur_size); 172 write_extent_buffer(leaf, kaddr, ptr, cur_size);
200 kunmap(cpage); 173 kunmap_atomic(kaddr, KM_USER0);
201 174
202 i++; 175 i++;
203 ptr += cur_size; 176 ptr += cur_size;
@@ -232,7 +205,7 @@ fail:
232 * does the checks required to make sure the data is small enough 205 * does the checks required to make sure the data is small enough
233 * to fit as an inline extent. 206 * to fit as an inline extent.
234 */ 207 */
235static int cow_file_range_inline(struct btrfs_trans_handle *trans, 208static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
236 struct btrfs_root *root, 209 struct btrfs_root *root,
237 struct inode *inode, u64 start, u64 end, 210 struct inode *inode, u64 start, u64 end,
238 size_t compressed_size, 211 size_t compressed_size,
@@ -882,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
882 u64 cur_end; 855 u64 cur_end;
883 int limit = 10 * 1024 * 1042; 856 int limit = 10 * 1024 * 1042;
884 857
885 if (!btrfs_test_opt(root, COMPRESS)) {
886 return cow_file_range(inode, locked_page, start, end,
887 page_started, nr_written, 1);
888 }
889
890 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | 858 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED |
891 EXTENT_DELALLOC, 1, 0, GFP_NOFS); 859 EXTENT_DELALLOC, 1, 0, GFP_NOFS);
892 while (start < end) { 860 while (start < end) {
@@ -963,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
963 * If no cow copies or snapshots exist, we write directly to the existing 931 * If no cow copies or snapshots exist, we write directly to the existing
964 * blocks on disk 932 * blocks on disk
965 */ 933 */
966static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, 934static noinline int run_delalloc_nocow(struct inode *inode,
935 struct page *locked_page,
967 u64 start, u64 end, int *page_started, int force, 936 u64 start, u64 end, int *page_started, int force,
968 unsigned long *nr_written) 937 unsigned long *nr_written)
969{ 938{
@@ -1161,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1161 unsigned long *nr_written) 1130 unsigned long *nr_written)
1162{ 1131{
1163 int ret; 1132 int ret;
1133 struct btrfs_root *root = BTRFS_I(inode)->root;
1164 1134
1165 if (btrfs_test_flag(inode, NODATACOW)) 1135 if (btrfs_test_flag(inode, NODATACOW))
1166 ret = run_delalloc_nocow(inode, locked_page, start, end, 1136 ret = run_delalloc_nocow(inode, locked_page, start, end,
@@ -1168,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1168 else if (btrfs_test_flag(inode, PREALLOC)) 1138 else if (btrfs_test_flag(inode, PREALLOC))
1169 ret = run_delalloc_nocow(inode, locked_page, start, end, 1139 ret = run_delalloc_nocow(inode, locked_page, start, end,
1170 page_started, 0, nr_written); 1140 page_started, 0, nr_written);
1141 else if (!btrfs_test_opt(root, COMPRESS))
1142 ret = cow_file_range(inode, locked_page, start, end,
1143 page_started, nr_written, 1);
1171 else 1144 else
1172 ret = cow_file_range_async(inode, locked_page, start, end, 1145 ret = cow_file_range_async(inode, locked_page, start, end,
1173 page_started, nr_written); 1146 page_started, nr_written);
1174
1175 return ret; 1147 return ret;
1176} 1148}
1177 1149
@@ -1190,6 +1162,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1190 */ 1162 */
1191 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { 1163 if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
1192 struct btrfs_root *root = BTRFS_I(inode)->root; 1164 struct btrfs_root *root = BTRFS_I(inode)->root;
1165 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1193 spin_lock(&root->fs_info->delalloc_lock); 1166 spin_lock(&root->fs_info->delalloc_lock);
1194 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1167 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1195 root->fs_info->delalloc_bytes += end - start + 1; 1168 root->fs_info->delalloc_bytes += end - start + 1;
@@ -1223,9 +1196,12 @@ static int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
1223 (unsigned long long)end - start + 1, 1196 (unsigned long long)end - start + 1,
1224 (unsigned long long) 1197 (unsigned long long)
1225 root->fs_info->delalloc_bytes); 1198 root->fs_info->delalloc_bytes);
1199 btrfs_delalloc_free_space(root, inode, (u64)-1);
1226 root->fs_info->delalloc_bytes = 0; 1200 root->fs_info->delalloc_bytes = 0;
1227 BTRFS_I(inode)->delalloc_bytes = 0; 1201 BTRFS_I(inode)->delalloc_bytes = 0;
1228 } else { 1202 } else {
1203 btrfs_delalloc_free_space(root, inode,
1204 end - start + 1);
1229 root->fs_info->delalloc_bytes -= end - start + 1; 1205 root->fs_info->delalloc_bytes -= end - start + 1;
1230 BTRFS_I(inode)->delalloc_bytes -= end - start + 1; 1206 BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
1231 } 1207 }
@@ -1477,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1477 path = btrfs_alloc_path(); 1453 path = btrfs_alloc_path();
1478 BUG_ON(!path); 1454 BUG_ON(!path);
1479 1455
1456 path->leave_spinning = 1;
1480 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1457 ret = btrfs_drop_extents(trans, root, inode, file_pos,
1481 file_pos + num_bytes, file_pos, &hint); 1458 file_pos + num_bytes, file_pos, &hint);
1482 BUG_ON(ret); 1459 BUG_ON(ret);
@@ -1499,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1499 btrfs_set_file_extent_compression(leaf, fi, compression); 1476 btrfs_set_file_extent_compression(leaf, fi, compression);
1500 btrfs_set_file_extent_encryption(leaf, fi, encryption); 1477 btrfs_set_file_extent_encryption(leaf, fi, encryption);
1501 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); 1478 btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding);
1479
1480 btrfs_unlock_up_safe(path, 1);
1481 btrfs_set_lock_blocking(leaf);
1482
1502 btrfs_mark_buffer_dirty(leaf); 1483 btrfs_mark_buffer_dirty(leaf);
1503 1484
1504 inode_add_bytes(inode, num_bytes); 1485 inode_add_bytes(inode, num_bytes);
@@ -1511,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1511 root->root_key.objectid, 1492 root->root_key.objectid,
1512 trans->transid, inode->i_ino, &ins); 1493 trans->transid, inode->i_ino, &ins);
1513 BUG_ON(ret); 1494 BUG_ON(ret);
1514
1515 btrfs_free_path(path); 1495 btrfs_free_path(path);
1496
1516 return 0; 1497 return 0;
1517} 1498}
1518 1499
1500/*
1501 * helper function for btrfs_finish_ordered_io, this
1502 * just reads in some of the csum leaves to prime them into ram
1503 * before we start the transaction. It limits the amount of btree
1504 * reads required while inside the transaction.
1505 */
1506static noinline void reada_csum(struct btrfs_root *root,
1507 struct btrfs_path *path,
1508 struct btrfs_ordered_extent *ordered_extent)
1509{
1510 struct btrfs_ordered_sum *sum;
1511 u64 bytenr;
1512
1513 sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
1514 list);
1515 bytenr = sum->sums[0].bytenr;
1516
1517 /*
1518 * we don't care about the results, the point of this search is
1519 * just to get the btree leaves into ram
1520 */
1521 btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
1522}
1523
1519/* as ordered data IO finishes, this gets called so we can finish 1524/* as ordered data IO finishes, this gets called so we can finish
1520 * an ordered extent if the range of bytes in the file it covers are 1525 * an ordered extent if the range of bytes in the file it covers are
1521 * fully written. 1526 * fully written.
@@ -1524,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1524{ 1529{
1525 struct btrfs_root *root = BTRFS_I(inode)->root; 1530 struct btrfs_root *root = BTRFS_I(inode)->root;
1526 struct btrfs_trans_handle *trans; 1531 struct btrfs_trans_handle *trans;
1527 struct btrfs_ordered_extent *ordered_extent; 1532 struct btrfs_ordered_extent *ordered_extent = NULL;
1528 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1533 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1534 struct btrfs_path *path;
1529 int compressed = 0; 1535 int compressed = 0;
1530 int ret; 1536 int ret;
1531 1537
@@ -1533,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1533 if (!ret) 1539 if (!ret)
1534 return 0; 1540 return 0;
1535 1541
1542 /*
1543 * before we join the transaction, try to do some of our IO.
1544 * This will limit the amount of IO that we have to do with
1545 * the transaction running. We're unlikely to need to do any
1546 * IO if the file extents are new, the disk_i_size checks
1547 * covers the most common case.
1548 */
1549 if (start < BTRFS_I(inode)->disk_i_size) {
1550 path = btrfs_alloc_path();
1551 if (path) {
1552 ret = btrfs_lookup_file_extent(NULL, root, path,
1553 inode->i_ino,
1554 start, 0);
1555 ordered_extent = btrfs_lookup_ordered_extent(inode,
1556 start);
1557 if (!list_empty(&ordered_extent->list)) {
1558 btrfs_release_path(root, path);
1559 reada_csum(root, path, ordered_extent);
1560 }
1561 btrfs_free_path(path);
1562 }
1563 }
1564
1536 trans = btrfs_join_transaction(root, 1); 1565 trans = btrfs_join_transaction(root, 1);
1537 1566
1538 ordered_extent = btrfs_lookup_ordered_extent(inode, start); 1567 if (!ordered_extent)
1568 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1539 BUG_ON(!ordered_extent); 1569 BUG_ON(!ordered_extent);
1540 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) 1570 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
1541 goto nocow; 1571 goto nocow;
@@ -2125,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
2125 2155
2126 path = btrfs_alloc_path(); 2156 path = btrfs_alloc_path();
2127 BUG_ON(!path); 2157 BUG_ON(!path);
2158 path->leave_spinning = 1;
2128 ret = btrfs_lookup_inode(trans, root, path, 2159 ret = btrfs_lookup_inode(trans, root, path,
2129 &BTRFS_I(inode)->location, 1); 2160 &BTRFS_I(inode)->location, 1);
2130 if (ret) { 2161 if (ret) {
@@ -2171,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2171 goto err; 2202 goto err;
2172 } 2203 }
2173 2204
2205 path->leave_spinning = 1;
2174 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, 2206 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
2175 name, name_len, -1); 2207 name, name_len, -1);
2176 if (IS_ERR(di)) { 2208 if (IS_ERR(di)) {
@@ -2214,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2214 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2246 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2215 inode, dir->i_ino); 2247 inode, dir->i_ino);
2216 BUG_ON(ret != 0 && ret != -ENOENT); 2248 BUG_ON(ret != 0 && ret != -ENOENT);
2217 if (ret != -ENOENT)
2218 BTRFS_I(dir)->log_dirty_trans = trans->transid;
2219 2249
2220 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2250 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2221 dir, index); 2251 dir, index);
@@ -2245,13 +2275,12 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2245 2275
2246 root = BTRFS_I(dir)->root; 2276 root = BTRFS_I(dir)->root;
2247 2277
2248 ret = btrfs_check_free_space(root, 1, 1);
2249 if (ret)
2250 goto fail;
2251
2252 trans = btrfs_start_transaction(root, 1); 2278 trans = btrfs_start_transaction(root, 1);
2253 2279
2254 btrfs_set_trans_block_group(trans, dir); 2280 btrfs_set_trans_block_group(trans, dir);
2281
2282 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
2283
2255 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2284 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2256 dentry->d_name.name, dentry->d_name.len); 2285 dentry->d_name.name, dentry->d_name.len);
2257 2286
@@ -2261,7 +2290,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2261 nr = trans->blocks_used; 2290 nr = trans->blocks_used;
2262 2291
2263 btrfs_end_transaction_throttle(trans, root); 2292 btrfs_end_transaction_throttle(trans, root);
2264fail:
2265 btrfs_btree_balance_dirty(root, nr); 2293 btrfs_btree_balance_dirty(root, nr);
2266 return ret; 2294 return ret;
2267} 2295}
@@ -2284,10 +2312,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2284 return -ENOTEMPTY; 2312 return -ENOTEMPTY;
2285 } 2313 }
2286 2314
2287 ret = btrfs_check_free_space(root, 1, 1);
2288 if (ret)
2289 goto fail;
2290
2291 trans = btrfs_start_transaction(root, 1); 2315 trans = btrfs_start_transaction(root, 1);
2292 btrfs_set_trans_block_group(trans, dir); 2316 btrfs_set_trans_block_group(trans, dir);
2293 2317
@@ -2304,7 +2328,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
2304fail_trans: 2328fail_trans:
2305 nr = trans->blocks_used; 2329 nr = trans->blocks_used;
2306 ret = btrfs_end_transaction_throttle(trans, root); 2330 ret = btrfs_end_transaction_throttle(trans, root);
2307fail:
2308 btrfs_btree_balance_dirty(root, nr); 2331 btrfs_btree_balance_dirty(root, nr);
2309 2332
2310 if (ret && !err) 2333 if (ret && !err)
@@ -2531,9 +2554,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2531 key.offset = (u64)-1; 2554 key.offset = (u64)-1;
2532 key.type = (u8)-1; 2555 key.type = (u8)-1;
2533 2556
2534 btrfs_init_path(path);
2535
2536search_again: 2557search_again:
2558 path->leave_spinning = 1;
2537 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2559 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2538 if (ret < 0) 2560 if (ret < 0)
2539 goto error; 2561 goto error;
@@ -2680,6 +2702,7 @@ delete:
2680 break; 2702 break;
2681 } 2703 }
2682 if (found_extent) { 2704 if (found_extent) {
2705 btrfs_set_path_blocking(path);
2683 ret = btrfs_free_extent(trans, root, extent_start, 2706 ret = btrfs_free_extent(trans, root, extent_start,
2684 extent_num_bytes, 2707 extent_num_bytes,
2685 leaf->start, root_owner, 2708 leaf->start, root_owner,
@@ -2820,7 +2843,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
2820 if (size <= hole_start) 2843 if (size <= hole_start)
2821 return 0; 2844 return 0;
2822 2845
2823 err = btrfs_check_free_space(root, 1, 0); 2846 err = btrfs_check_metadata_free_space(root);
2824 if (err) 2847 if (err)
2825 return err; 2848 return err;
2826 2849
@@ -2884,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
2884 if (err) 2907 if (err)
2885 return err; 2908 return err;
2886 2909
2887 if (S_ISREG(inode->i_mode) && 2910 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
2888 attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { 2911 if (attr->ia_size > inode->i_size) {
2889 err = btrfs_cont_expand(inode, attr->ia_size); 2912 err = btrfs_cont_expand(inode, attr->ia_size);
2890 if (err) 2913 if (err)
2891 return err; 2914 return err;
2915 } else if (inode->i_size > 0 &&
2916 attr->ia_size == 0) {
2917
2918 /* we're truncating a file that used to have good
2919 * data down to zero. Make sure it gets into
2920 * the ordered flush list so that any new writes
2921 * get down to disk quickly.
2922 */
2923 BTRFS_I(inode)->ordered_data_close = 1;
2924 }
2892 } 2925 }
2893 2926
2894 err = inode_setattr(inode, attr); 2927 err = inode_setattr(inode, attr);
@@ -3016,16 +3049,18 @@ static noinline void init_btrfs_i(struct inode *inode)
3016 bi->last_trans = 0; 3049 bi->last_trans = 0;
3017 bi->logged_trans = 0; 3050 bi->logged_trans = 0;
3018 bi->delalloc_bytes = 0; 3051 bi->delalloc_bytes = 0;
3052 bi->reserved_bytes = 0;
3019 bi->disk_i_size = 0; 3053 bi->disk_i_size = 0;
3020 bi->flags = 0; 3054 bi->flags = 0;
3021 bi->index_cnt = (u64)-1; 3055 bi->index_cnt = (u64)-1;
3022 bi->log_dirty_trans = 0; 3056 bi->last_unlink_trans = 0;
3023 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3057 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3024 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3058 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3025 inode->i_mapping, GFP_NOFS); 3059 inode->i_mapping, GFP_NOFS);
3026 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, 3060 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3027 inode->i_mapping, GFP_NOFS); 3061 inode->i_mapping, GFP_NOFS);
3028 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); 3062 INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes);
3063 INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
3029 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); 3064 btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
3030 mutex_init(&BTRFS_I(inode)->extent_mutex); 3065 mutex_init(&BTRFS_I(inode)->extent_mutex);
3031 mutex_init(&BTRFS_I(inode)->log_mutex); 3066 mutex_init(&BTRFS_I(inode)->log_mutex);
@@ -3037,6 +3072,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
3037 inode->i_ino = args->ino; 3072 inode->i_ino = args->ino;
3038 init_btrfs_i(inode); 3073 init_btrfs_i(inode);
3039 BTRFS_I(inode)->root = args->root; 3074 BTRFS_I(inode)->root = args->root;
3075 btrfs_set_inode_space_info(args->root, inode);
3040 return 0; 3076 return 0;
3041} 3077}
3042 3078
@@ -3445,8 +3481,10 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3445 3481
3446 if (dir) { 3482 if (dir) {
3447 ret = btrfs_set_inode_index(dir, index); 3483 ret = btrfs_set_inode_index(dir, index);
3448 if (ret) 3484 if (ret) {
3485 iput(inode);
3449 return ERR_PTR(ret); 3486 return ERR_PTR(ret);
3487 }
3450 } 3488 }
3451 /* 3489 /*
3452 * index_cnt is ignored for everything but a dir, 3490 * index_cnt is ignored for everything but a dir,
@@ -3457,6 +3495,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3457 BTRFS_I(inode)->index_cnt = 2; 3495 BTRFS_I(inode)->index_cnt = 2;
3458 BTRFS_I(inode)->root = root; 3496 BTRFS_I(inode)->root = root;
3459 BTRFS_I(inode)->generation = trans->transid; 3497 BTRFS_I(inode)->generation = trans->transid;
3498 btrfs_set_inode_space_info(root, inode);
3460 3499
3461 if (mode & S_IFDIR) 3500 if (mode & S_IFDIR)
3462 owner = 0; 3501 owner = 0;
@@ -3482,6 +3521,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
3482 sizes[0] = sizeof(struct btrfs_inode_item); 3521 sizes[0] = sizeof(struct btrfs_inode_item);
3483 sizes[1] = name_len + sizeof(*ref); 3522 sizes[1] = name_len + sizeof(*ref);
3484 3523
3524 path->leave_spinning = 1;
3485 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); 3525 ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
3486 if (ret != 0) 3526 if (ret != 0)
3487 goto fail; 3527 goto fail;
@@ -3527,6 +3567,7 @@ fail:
3527 if (dir) 3567 if (dir)
3528 BTRFS_I(dir)->index_cnt--; 3568 BTRFS_I(dir)->index_cnt--;
3529 btrfs_free_path(path); 3569 btrfs_free_path(path);
3570 iput(inode);
3530 return ERR_PTR(ret); 3571 return ERR_PTR(ret);
3531} 3572}
3532 3573
@@ -3604,7 +3645,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
3604 if (!new_valid_dev(rdev)) 3645 if (!new_valid_dev(rdev))
3605 return -EINVAL; 3646 return -EINVAL;
3606 3647
3607 err = btrfs_check_free_space(root, 1, 0); 3648 err = btrfs_check_metadata_free_space(root);
3608 if (err) 3649 if (err)
3609 goto fail; 3650 goto fail;
3610 3651
@@ -3667,7 +3708,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
3667 u64 objectid; 3708 u64 objectid;
3668 u64 index = 0; 3709 u64 index = 0;
3669 3710
3670 err = btrfs_check_free_space(root, 1, 0); 3711 err = btrfs_check_metadata_free_space(root);
3671 if (err) 3712 if (err)
3672 goto fail; 3713 goto fail;
3673 trans = btrfs_start_transaction(root, 1); 3714 trans = btrfs_start_transaction(root, 1);
@@ -3735,7 +3776,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3735 return -ENOENT; 3776 return -ENOENT;
3736 3777
3737 btrfs_inc_nlink(inode); 3778 btrfs_inc_nlink(inode);
3738 err = btrfs_check_free_space(root, 1, 0); 3779 err = btrfs_check_metadata_free_space(root);
3739 if (err) 3780 if (err)
3740 goto fail; 3781 goto fail;
3741 err = btrfs_set_inode_index(dir, &index); 3782 err = btrfs_set_inode_index(dir, &index);
@@ -3760,6 +3801,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3760 drop_inode = 1; 3801 drop_inode = 1;
3761 3802
3762 nr = trans->blocks_used; 3803 nr = trans->blocks_used;
3804
3805 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
3763 btrfs_end_transaction_throttle(trans, root); 3806 btrfs_end_transaction_throttle(trans, root);
3764fail: 3807fail:
3765 if (drop_inode) { 3808 if (drop_inode) {
@@ -3781,7 +3824,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
3781 u64 index = 0; 3824 u64 index = 0;
3782 unsigned long nr = 1; 3825 unsigned long nr = 1;
3783 3826
3784 err = btrfs_check_free_space(root, 1, 0); 3827 err = btrfs_check_metadata_free_space(root);
3785 if (err) 3828 if (err)
3786 goto out_unlock; 3829 goto out_unlock;
3787 3830
@@ -4263,7 +4306,7 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
4263{ 4306{
4264 if (PageWriteback(page) || PageDirty(page)) 4307 if (PageWriteback(page) || PageDirty(page))
4265 return 0; 4308 return 0;
4266 return __btrfs_releasepage(page, gfp_flags); 4309 return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
4267} 4310}
4268 4311
4269static void btrfs_invalidatepage(struct page *page, unsigned long offset) 4312static void btrfs_invalidatepage(struct page *page, unsigned long offset)
@@ -4325,8 +4368,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4325 * beyond EOF, then the page is guaranteed safe against truncation until we 4368 * beyond EOF, then the page is guaranteed safe against truncation until we
4326 * unlock the page. 4369 * unlock the page.
4327 */ 4370 */
4328int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 4371int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4329{ 4372{
4373 struct page *page = vmf->page;
4330 struct inode *inode = fdentry(vma->vm_file)->d_inode; 4374 struct inode *inode = fdentry(vma->vm_file)->d_inode;
4331 struct btrfs_root *root = BTRFS_I(inode)->root; 4375 struct btrfs_root *root = BTRFS_I(inode)->root;
4332 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4376 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -4338,11 +4382,16 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
4338 u64 page_start; 4382 u64 page_start;
4339 u64 page_end; 4383 u64 page_end;
4340 4384
4341 ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0); 4385 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
4342 if (ret) 4386 if (ret) {
4387 if (ret == -ENOMEM)
4388 ret = VM_FAULT_OOM;
4389 else /* -ENOSPC, -EIO, etc */
4390 ret = VM_FAULT_SIGBUS;
4343 goto out; 4391 goto out;
4392 }
4344 4393
4345 ret = -EINVAL; 4394 ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
4346again: 4395again:
4347 lock_page(page); 4396 lock_page(page);
4348 size = i_size_read(inode); 4397 size = i_size_read(inode);
@@ -4351,6 +4400,7 @@ again:
4351 4400
4352 if ((page->mapping != inode->i_mapping) || 4401 if ((page->mapping != inode->i_mapping) ||
4353 (page_start >= size)) { 4402 (page_start >= size)) {
4403 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
4354 /* page got truncated out from underneath us */ 4404 /* page got truncated out from underneath us */
4355 goto out_unlock; 4405 goto out_unlock;
4356 } 4406 }
@@ -4389,6 +4439,8 @@ again:
4389 } 4439 }
4390 ClearPageChecked(page); 4440 ClearPageChecked(page);
4391 set_page_dirty(page); 4441 set_page_dirty(page);
4442
4443 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
4392 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4444 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
4393 4445
4394out_unlock: 4446out_unlock:
@@ -4414,6 +4466,27 @@ static void btrfs_truncate(struct inode *inode)
4414 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); 4466 btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
4415 4467
4416 trans = btrfs_start_transaction(root, 1); 4468 trans = btrfs_start_transaction(root, 1);
4469
4470 /*
4471 * setattr is responsible for setting the ordered_data_close flag,
4472 * but that is only tested during the last file release. That
4473 * could happen well after the next commit, leaving a great big
4474 * window where new writes may get lost if someone chooses to write
4475 * to this file after truncating to zero
4476 *
4477 * The inode doesn't have any dirty data here, and so if we commit
4478 * this is a noop. If someone immediately starts writing to the inode
4479 * it is very likely we'll catch some of their writes in this
4480 * transaction, and the commit will find this file on the ordered
4481 * data list with good things to send down.
4482 *
4483 * This is a best effort solution, there is still a window where
4484 * using truncate to replace the contents of the file will
4485 * end up with a zero length file after a crash.
4486 */
4487 if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
4488 btrfs_add_ordered_operation(trans, root, inode);
4489
4417 btrfs_set_trans_block_group(trans, inode); 4490 btrfs_set_trans_block_group(trans, inode);
4418 btrfs_i_size_write(inode, inode->i_size); 4491 btrfs_i_size_write(inode, inode->i_size);
4419 4492
@@ -4490,12 +4563,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
4490 ei->i_acl = BTRFS_ACL_NOT_CACHED; 4563 ei->i_acl = BTRFS_ACL_NOT_CACHED;
4491 ei->i_default_acl = BTRFS_ACL_NOT_CACHED; 4564 ei->i_default_acl = BTRFS_ACL_NOT_CACHED;
4492 INIT_LIST_HEAD(&ei->i_orphan); 4565 INIT_LIST_HEAD(&ei->i_orphan);
4566 INIT_LIST_HEAD(&ei->ordered_operations);
4493 return &ei->vfs_inode; 4567 return &ei->vfs_inode;
4494} 4568}
4495 4569
4496void btrfs_destroy_inode(struct inode *inode) 4570void btrfs_destroy_inode(struct inode *inode)
4497{ 4571{
4498 struct btrfs_ordered_extent *ordered; 4572 struct btrfs_ordered_extent *ordered;
4573 struct btrfs_root *root = BTRFS_I(inode)->root;
4574
4499 WARN_ON(!list_empty(&inode->i_dentry)); 4575 WARN_ON(!list_empty(&inode->i_dentry));
4500 WARN_ON(inode->i_data.nrpages); 4576 WARN_ON(inode->i_data.nrpages);
4501 4577
@@ -4506,13 +4582,24 @@ void btrfs_destroy_inode(struct inode *inode)
4506 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) 4582 BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED)
4507 posix_acl_release(BTRFS_I(inode)->i_default_acl); 4583 posix_acl_release(BTRFS_I(inode)->i_default_acl);
4508 4584
4509 spin_lock(&BTRFS_I(inode)->root->list_lock); 4585 /*
4586 * Make sure we're properly removed from the ordered operation
4587 * lists.
4588 */
4589 smp_mb();
4590 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
4591 spin_lock(&root->fs_info->ordered_extent_lock);
4592 list_del_init(&BTRFS_I(inode)->ordered_operations);
4593 spin_unlock(&root->fs_info->ordered_extent_lock);
4594 }
4595
4596 spin_lock(&root->list_lock);
4510 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 4597 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
4511 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" 4598 printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
4512 " list\n", inode->i_ino); 4599 " list\n", inode->i_ino);
4513 dump_stack(); 4600 dump_stack();
4514 } 4601 }
4515 spin_unlock(&BTRFS_I(inode)->root->list_lock); 4602 spin_unlock(&root->list_lock);
4516 4603
4517 while (1) { 4604 while (1) {
4518 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); 4605 ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
@@ -4633,12 +4720,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4633 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 4720 if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
4634 return -EXDEV; 4721 return -EXDEV;
4635 4722
4636 ret = btrfs_check_free_space(root, 1, 0); 4723 ret = btrfs_check_metadata_free_space(root);
4637 if (ret) 4724 if (ret)
4638 goto out_unlock; 4725 goto out_unlock;
4639 4726
4727 /*
4728 * we're using rename to replace one file with another.
4729 * and the replacement file is large. Start IO on it now so
4730 * we don't add too much work to the end of the transaction
4731 */
4732 if (new_inode && old_inode && S_ISREG(old_inode->i_mode) &&
4733 new_inode->i_size &&
4734 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
4735 filemap_flush(old_inode->i_mapping);
4736
4640 trans = btrfs_start_transaction(root, 1); 4737 trans = btrfs_start_transaction(root, 1);
4641 4738
4739 /*
4740 * make sure the inode gets flushed if it is replacing
4741 * something.
4742 */
4743 if (new_inode && new_inode->i_size &&
4744 old_inode && S_ISREG(old_inode->i_mode)) {
4745 btrfs_add_ordered_operation(trans, root, old_inode);
4746 }
4747
4748 /*
4749 * this is an ugly little race, but the rename is required to make
4750 * sure that if we crash, the inode is either at the old name
4751 * or the new one. pinning the log transaction lets us make sure
4752 * we don't allow a log commit to come in after we unlink the
4753 * name but before we add the new name back in.
4754 */
4755 btrfs_pin_log_trans(root);
4756
4642 btrfs_set_trans_block_group(trans, new_dir); 4757 btrfs_set_trans_block_group(trans, new_dir);
4643 4758
4644 btrfs_inc_nlink(old_dentry->d_inode); 4759 btrfs_inc_nlink(old_dentry->d_inode);
@@ -4646,6 +4761,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4646 new_dir->i_ctime = new_dir->i_mtime = ctime; 4761 new_dir->i_ctime = new_dir->i_mtime = ctime;
4647 old_inode->i_ctime = ctime; 4762 old_inode->i_ctime = ctime;
4648 4763
4764 if (old_dentry->d_parent != new_dentry->d_parent)
4765 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
4766
4649 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, 4767 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
4650 old_dentry->d_name.name, 4768 old_dentry->d_name.name,
4651 old_dentry->d_name.len); 4769 old_dentry->d_name.len);
@@ -4677,7 +4795,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4677 if (ret) 4795 if (ret)
4678 goto out_fail; 4796 goto out_fail;
4679 4797
4798 btrfs_log_new_name(trans, old_inode, old_dir,
4799 new_dentry->d_parent);
4680out_fail: 4800out_fail:
4801
4802 /* this btrfs_end_log_trans just allows the current
4803 * log-sub transaction to complete
4804 */
4805 btrfs_end_log_trans(root);
4681 btrfs_end_transaction_throttle(trans, root); 4806 btrfs_end_transaction_throttle(trans, root);
4682out_unlock: 4807out_unlock:
4683 return ret; 4808 return ret;
@@ -4751,7 +4876,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
4751 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) 4876 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
4752 return -ENAMETOOLONG; 4877 return -ENAMETOOLONG;
4753 4878
4754 err = btrfs_check_free_space(root, 1, 0); 4879 err = btrfs_check_metadata_free_space(root);
4755 if (err) 4880 if (err)
4756 goto out_fail; 4881 goto out_fail;
4757 4882