aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c216
1 files changed, 154 insertions, 62 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 82d0342763c5..0165b8672f09 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -692,7 +692,10 @@ next:
692int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 692int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
693 struct btrfs_root *root, struct inode *inode, 693 struct btrfs_root *root, struct inode *inode,
694 struct btrfs_path *path, u64 start, u64 end, 694 struct btrfs_path *path, u64 start, u64 end,
695 u64 *drop_end, int drop_cache) 695 u64 *drop_end, int drop_cache,
696 int replace_extent,
697 u32 extent_item_size,
698 int *key_inserted)
696{ 699{
697 struct extent_buffer *leaf; 700 struct extent_buffer *leaf;
698 struct btrfs_file_extent_item *fi; 701 struct btrfs_file_extent_item *fi;
@@ -712,6 +715,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
712 int modify_tree = -1; 715 int modify_tree = -1;
713 int update_refs = (root->ref_cows || root == root->fs_info->tree_root); 716 int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
714 int found = 0; 717 int found = 0;
718 int leafs_visited = 0;
715 719
716 if (drop_cache) 720 if (drop_cache)
717 btrfs_drop_extent_cache(inode, start, end - 1, 0); 721 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -733,6 +737,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
733 path->slots[0]--; 737 path->slots[0]--;
734 } 738 }
735 ret = 0; 739 ret = 0;
740 leafs_visited++;
736next_slot: 741next_slot:
737 leaf = path->nodes[0]; 742 leaf = path->nodes[0];
738 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 743 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -744,6 +749,7 @@ next_slot:
744 ret = 0; 749 ret = 0;
745 break; 750 break;
746 } 751 }
752 leafs_visited++;
747 leaf = path->nodes[0]; 753 leaf = path->nodes[0];
748 recow = 1; 754 recow = 1;
749 } 755 }
@@ -766,7 +772,8 @@ next_slot:
766 btrfs_file_extent_num_bytes(leaf, fi); 772 btrfs_file_extent_num_bytes(leaf, fi);
767 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { 773 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
768 extent_end = key.offset + 774 extent_end = key.offset +
769 btrfs_file_extent_inline_len(leaf, fi); 775 btrfs_file_extent_inline_len(leaf,
776 path->slots[0], fi);
770 } else { 777 } else {
771 WARN_ON(1); 778 WARN_ON(1);
772 extent_end = search_start; 779 extent_end = search_start;
@@ -927,14 +934,44 @@ next_slot:
927 } 934 }
928 935
929 if (!ret && del_nr > 0) { 936 if (!ret && del_nr > 0) {
937 /*
938 * Set path->slots[0] to first slot, so that after the delete
939 * if items are move off from our leaf to its immediate left or
940 * right neighbor leafs, we end up with a correct and adjusted
941 * path->slots[0] for our insertion.
942 */
943 path->slots[0] = del_slot;
930 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 944 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
931 if (ret) 945 if (ret)
932 btrfs_abort_transaction(trans, root, ret); 946 btrfs_abort_transaction(trans, root, ret);
947
948 leaf = path->nodes[0];
949 /*
950 * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
951 * is, its contents got pushed to its neighbors), in which case
952 * it means path->locks[0] == 0
953 */
954 if (!ret && replace_extent && leafs_visited == 1 &&
955 path->locks[0] &&
956 btrfs_leaf_free_space(root, leaf) >=
957 sizeof(struct btrfs_item) + extent_item_size) {
958
959 key.objectid = ino;
960 key.type = BTRFS_EXTENT_DATA_KEY;
961 key.offset = start;
962 setup_items_for_insert(root, path, &key,
963 &extent_item_size,
964 extent_item_size,
965 sizeof(struct btrfs_item) +
966 extent_item_size, 1);
967 *key_inserted = 1;
968 }
933 } 969 }
934 970
971 if (!replace_extent || !(*key_inserted))
972 btrfs_release_path(path);
935 if (drop_end) 973 if (drop_end)
936 *drop_end = found ? min(end, extent_end) : end; 974 *drop_end = found ? min(end, extent_end) : end;
937 btrfs_release_path(path);
938 return ret; 975 return ret;
939} 976}
940 977
@@ -949,7 +986,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
949 if (!path) 986 if (!path)
950 return -ENOMEM; 987 return -ENOMEM;
951 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, 988 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
952 drop_cache); 989 drop_cache, 0, 0, NULL);
953 btrfs_free_path(path); 990 btrfs_free_path(path);
954 return ret; 991 return ret;
955} 992}
@@ -1235,29 +1272,18 @@ static int prepare_uptodate_page(struct page *page, u64 pos,
1235} 1272}
1236 1273
1237/* 1274/*
1238 * this gets pages into the page cache and locks them down, it also properly 1275 * this just gets pages into the page cache and locks them down.
1239 * waits for data=ordered extents to finish before allowing the pages to be
1240 * modified.
1241 */ 1276 */
1242static noinline int prepare_pages(struct btrfs_root *root, struct file *file, 1277static noinline int prepare_pages(struct inode *inode, struct page **pages,
1243 struct page **pages, size_t num_pages, 1278 size_t num_pages, loff_t pos,
1244 loff_t pos, unsigned long first_index, 1279 size_t write_bytes, bool force_uptodate)
1245 size_t write_bytes, bool force_uptodate)
1246{ 1280{
1247 struct extent_state *cached_state = NULL;
1248 int i; 1281 int i;
1249 unsigned long index = pos >> PAGE_CACHE_SHIFT; 1282 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1250 struct inode *inode = file_inode(file);
1251 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1283 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
1252 int err = 0; 1284 int err = 0;
1253 int faili = 0; 1285 int faili;
1254 u64 start_pos;
1255 u64 last_pos;
1256
1257 start_pos = pos & ~((u64)root->sectorsize - 1);
1258 last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
1259 1286
1260again:
1261 for (i = 0; i < num_pages; i++) { 1287 for (i = 0; i < num_pages; i++) {
1262 pages[i] = find_or_create_page(inode->i_mapping, index + i, 1288 pages[i] = find_or_create_page(inode->i_mapping, index + i,
1263 mask | __GFP_WRITE); 1289 mask | __GFP_WRITE);
@@ -1280,57 +1306,85 @@ again:
1280 } 1306 }
1281 wait_on_page_writeback(pages[i]); 1307 wait_on_page_writeback(pages[i]);
1282 } 1308 }
1283 faili = num_pages - 1; 1309
1284 err = 0; 1310 return 0;
1311fail:
1312 while (faili >= 0) {
1313 unlock_page(pages[faili]);
1314 page_cache_release(pages[faili]);
1315 faili--;
1316 }
1317 return err;
1318
1319}
1320
1321/*
1322 * This function locks the extent and properly waits for data=ordered extents
1323 * to finish before allowing the pages to be modified if need.
1324 *
1325 * The return value:
1326 * 1 - the extent is locked
1327 * 0 - the extent is not locked, and everything is OK
1328 * -EAGAIN - need re-prepare the pages
1329 * the other < 0 number - Something wrong happens
1330 */
1331static noinline int
1332lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
1333 size_t num_pages, loff_t pos,
1334 u64 *lockstart, u64 *lockend,
1335 struct extent_state **cached_state)
1336{
1337 u64 start_pos;
1338 u64 last_pos;
1339 int i;
1340 int ret = 0;
1341
1342 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1343 last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
1344
1285 if (start_pos < inode->i_size) { 1345 if (start_pos < inode->i_size) {
1286 struct btrfs_ordered_extent *ordered; 1346 struct btrfs_ordered_extent *ordered;
1287 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1347 lock_extent_bits(&BTRFS_I(inode)->io_tree,
1288 start_pos, last_pos - 1, 0, &cached_state); 1348 start_pos, last_pos, 0, cached_state);
1289 ordered = btrfs_lookup_first_ordered_extent(inode, 1349 ordered = btrfs_lookup_first_ordered_extent(inode, last_pos);
1290 last_pos - 1);
1291 if (ordered && 1350 if (ordered &&
1292 ordered->file_offset + ordered->len > start_pos && 1351 ordered->file_offset + ordered->len > start_pos &&
1293 ordered->file_offset < last_pos) { 1352 ordered->file_offset <= last_pos) {
1294 btrfs_put_ordered_extent(ordered); 1353 btrfs_put_ordered_extent(ordered);
1295 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1354 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1296 start_pos, last_pos - 1, 1355 start_pos, last_pos,
1297 &cached_state, GFP_NOFS); 1356 cached_state, GFP_NOFS);
1298 for (i = 0; i < num_pages; i++) { 1357 for (i = 0; i < num_pages; i++) {
1299 unlock_page(pages[i]); 1358 unlock_page(pages[i]);
1300 page_cache_release(pages[i]); 1359 page_cache_release(pages[i]);
1301 } 1360 }
1302 err = btrfs_wait_ordered_range(inode, start_pos, 1361 ret = btrfs_wait_ordered_range(inode, start_pos,
1303 last_pos - start_pos); 1362 last_pos - start_pos + 1);
1304 if (err) 1363 if (ret)
1305 goto fail; 1364 return ret;
1306 goto again; 1365 else
1366 return -EAGAIN;
1307 } 1367 }
1308 if (ordered) 1368 if (ordered)
1309 btrfs_put_ordered_extent(ordered); 1369 btrfs_put_ordered_extent(ordered);
1310 1370
1311 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, 1371 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
1312 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1372 last_pos, EXTENT_DIRTY | EXTENT_DELALLOC |
1313 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1373 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
1314 0, 0, &cached_state, GFP_NOFS); 1374 0, 0, cached_state, GFP_NOFS);
1315 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1375 *lockstart = start_pos;
1316 start_pos, last_pos - 1, &cached_state, 1376 *lockend = last_pos;
1317 GFP_NOFS); 1377 ret = 1;
1318 } 1378 }
1379
1319 for (i = 0; i < num_pages; i++) { 1380 for (i = 0; i < num_pages; i++) {
1320 if (clear_page_dirty_for_io(pages[i])) 1381 if (clear_page_dirty_for_io(pages[i]))
1321 account_page_redirty(pages[i]); 1382 account_page_redirty(pages[i]);
1322 set_page_extent_mapped(pages[i]); 1383 set_page_extent_mapped(pages[i]);
1323 WARN_ON(!PageLocked(pages[i])); 1384 WARN_ON(!PageLocked(pages[i]));
1324 } 1385 }
1325 return 0;
1326fail:
1327 while (faili >= 0) {
1328 unlock_page(pages[faili]);
1329 page_cache_release(pages[faili]);
1330 faili--;
1331 }
1332 return err;
1333 1386
1387 return ret;
1334} 1388}
1335 1389
1336static noinline int check_can_nocow(struct inode *inode, loff_t pos, 1390static noinline int check_can_nocow(struct inode *inode, loff_t pos,
@@ -1381,13 +1435,17 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1381 struct inode *inode = file_inode(file); 1435 struct inode *inode = file_inode(file);
1382 struct btrfs_root *root = BTRFS_I(inode)->root; 1436 struct btrfs_root *root = BTRFS_I(inode)->root;
1383 struct page **pages = NULL; 1437 struct page **pages = NULL;
1438 struct extent_state *cached_state = NULL;
1384 u64 release_bytes = 0; 1439 u64 release_bytes = 0;
1440 u64 lockstart;
1441 u64 lockend;
1385 unsigned long first_index; 1442 unsigned long first_index;
1386 size_t num_written = 0; 1443 size_t num_written = 0;
1387 int nrptrs; 1444 int nrptrs;
1388 int ret = 0; 1445 int ret = 0;
1389 bool only_release_metadata = false; 1446 bool only_release_metadata = false;
1390 bool force_page_uptodate = false; 1447 bool force_page_uptodate = false;
1448 bool need_unlock;
1391 1449
1392 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1450 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1393 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1451 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
@@ -1456,18 +1514,31 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1456 } 1514 }
1457 1515
1458 release_bytes = reserve_bytes; 1516 release_bytes = reserve_bytes;
1459 1517 need_unlock = false;
1518again:
1460 /* 1519 /*
1461 * This is going to setup the pages array with the number of 1520 * This is going to setup the pages array with the number of
1462 * pages we want, so we don't really need to worry about the 1521 * pages we want, so we don't really need to worry about the
1463 * contents of pages from loop to loop 1522 * contents of pages from loop to loop
1464 */ 1523 */
1465 ret = prepare_pages(root, file, pages, num_pages, 1524 ret = prepare_pages(inode, pages, num_pages,
1466 pos, first_index, write_bytes, 1525 pos, write_bytes,
1467 force_page_uptodate); 1526 force_page_uptodate);
1468 if (ret) 1527 if (ret)
1469 break; 1528 break;
1470 1529
1530 ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
1531 pos, &lockstart, &lockend,
1532 &cached_state);
1533 if (ret < 0) {
1534 if (ret == -EAGAIN)
1535 goto again;
1536 break;
1537 } else if (ret > 0) {
1538 need_unlock = true;
1539 ret = 0;
1540 }
1541
1471 copied = btrfs_copy_from_user(pos, num_pages, 1542 copied = btrfs_copy_from_user(pos, num_pages,
1472 write_bytes, pages, i); 1543 write_bytes, pages, i);
1473 1544
@@ -1512,19 +1583,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1512 } 1583 }
1513 1584
1514 release_bytes = dirty_pages << PAGE_CACHE_SHIFT; 1585 release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
1515 if (copied > 0) { 1586
1587 if (copied > 0)
1516 ret = btrfs_dirty_pages(root, inode, pages, 1588 ret = btrfs_dirty_pages(root, inode, pages,
1517 dirty_pages, pos, copied, 1589 dirty_pages, pos, copied,
1518 NULL); 1590 NULL);
1519 if (ret) { 1591 if (need_unlock)
1520 btrfs_drop_pages(pages, num_pages); 1592 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1521 break; 1593 lockstart, lockend, &cached_state,
1522 } 1594 GFP_NOFS);
1595 if (ret) {
1596 btrfs_drop_pages(pages, num_pages);
1597 break;
1523 } 1598 }
1524 1599
1525 release_bytes = 0; 1600 release_bytes = 0;
1526 btrfs_drop_pages(pages, num_pages);
1527
1528 if (only_release_metadata && copied > 0) { 1601 if (only_release_metadata && copied > 0) {
1529 u64 lockstart = round_down(pos, root->sectorsize); 1602 u64 lockstart = round_down(pos, root->sectorsize);
1530 u64 lockend = lockstart + 1603 u64 lockend = lockstart +
@@ -1536,6 +1609,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1536 only_release_metadata = false; 1609 only_release_metadata = false;
1537 } 1610 }
1538 1611
1612 btrfs_drop_pages(pages, num_pages);
1613
1539 cond_resched(); 1614 cond_resched();
1540 1615
1541 balance_dirty_pages_ratelimited(inode->i_mapping); 1616 balance_dirty_pages_ratelimited(inode->i_mapping);
@@ -1857,12 +1932,24 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1857 if (file->private_data) 1932 if (file->private_data)
1858 btrfs_ioctl_trans_end(file); 1933 btrfs_ioctl_trans_end(file);
1859 1934
1935 /*
1936 * We use start here because we will need to wait on the IO to complete
1937 * in btrfs_sync_log, which could require joining a transaction (for
1938 * example checking cross references in the nocow path). If we use join
1939 * here we could get into a situation where we're waiting on IO to
1940 * happen that is blocked on a transaction trying to commit. With start
1941 * we inc the extwriter counter, so we wait for all extwriters to exit
1942 * before we start blocking join'ers. This comment is to keep somebody
1943 * from thinking they are super smart and changing this to
1944 * btrfs_join_transaction *cough*Josef*cough*.
1945 */
1860 trans = btrfs_start_transaction(root, 0); 1946 trans = btrfs_start_transaction(root, 0);
1861 if (IS_ERR(trans)) { 1947 if (IS_ERR(trans)) {
1862 ret = PTR_ERR(trans); 1948 ret = PTR_ERR(trans);
1863 mutex_unlock(&inode->i_mutex); 1949 mutex_unlock(&inode->i_mutex);
1864 goto out; 1950 goto out;
1865 } 1951 }
1952 trans->sync = true;
1866 1953
1867 ret = btrfs_log_dentry_safe(trans, root, dentry); 1954 ret = btrfs_log_dentry_safe(trans, root, dentry);
1868 if (ret < 0) { 1955 if (ret < 0) {
@@ -1963,11 +2050,13 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
1963 struct btrfs_key key; 2050 struct btrfs_key key;
1964 int ret; 2051 int ret;
1965 2052
2053 if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
2054 goto out;
2055
1966 key.objectid = btrfs_ino(inode); 2056 key.objectid = btrfs_ino(inode);
1967 key.type = BTRFS_EXTENT_DATA_KEY; 2057 key.type = BTRFS_EXTENT_DATA_KEY;
1968 key.offset = offset; 2058 key.offset = offset;
1969 2059
1970
1971 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 2060 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
1972 if (ret < 0) 2061 if (ret < 0)
1973 return ret; 2062 return ret;
@@ -2064,8 +2153,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2064 u64 drop_end; 2153 u64 drop_end;
2065 int ret = 0; 2154 int ret = 0;
2066 int err = 0; 2155 int err = 0;
2156 int rsv_count;
2067 bool same_page = ((offset >> PAGE_CACHE_SHIFT) == 2157 bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
2068 ((offset + len - 1) >> PAGE_CACHE_SHIFT)); 2158 ((offset + len - 1) >> PAGE_CACHE_SHIFT));
2159 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2069 2160
2070 ret = btrfs_wait_ordered_range(inode, offset, len); 2161 ret = btrfs_wait_ordered_range(inode, offset, len);
2071 if (ret) 2162 if (ret)
@@ -2125,7 +2216,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2125 * we need to try again. 2216 * we need to try again.
2126 */ 2217 */
2127 if ((!ordered || 2218 if ((!ordered ||
2128 (ordered->file_offset + ordered->len < lockstart || 2219 (ordered->file_offset + ordered->len <= lockstart ||
2129 ordered->file_offset > lockend)) && 2220 ordered->file_offset > lockend)) &&
2130 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart, 2221 !test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
2131 lockend, EXTENT_UPTODATE, 0, 2222 lockend, EXTENT_UPTODATE, 0,
@@ -2163,9 +2254,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2163 /* 2254 /*
2164 * 1 - update the inode 2255 * 1 - update the inode
2165 * 1 - removing the extents in the range 2256 * 1 - removing the extents in the range
2166 * 1 - adding the hole extent 2257 * 1 - adding the hole extent if no_holes isn't set
2167 */ 2258 */
2168 trans = btrfs_start_transaction(root, 3); 2259 rsv_count = no_holes ? 2 : 3;
2260 trans = btrfs_start_transaction(root, rsv_count);
2169 if (IS_ERR(trans)) { 2261 if (IS_ERR(trans)) {
2170 err = PTR_ERR(trans); 2262 err = PTR_ERR(trans);
2171 goto out_free; 2263 goto out_free;
@@ -2179,7 +2271,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2179 while (cur_offset < lockend) { 2271 while (cur_offset < lockend) {
2180 ret = __btrfs_drop_extents(trans, root, inode, path, 2272 ret = __btrfs_drop_extents(trans, root, inode, path,
2181 cur_offset, lockend + 1, 2273 cur_offset, lockend + 1,
2182 &drop_end, 1); 2274 &drop_end, 1, 0, 0, NULL);
2183 if (ret != -ENOSPC) 2275 if (ret != -ENOSPC)
2184 break; 2276 break;
2185 2277
@@ -2202,7 +2294,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2202 btrfs_end_transaction(trans, root); 2294 btrfs_end_transaction(trans, root);
2203 btrfs_btree_balance_dirty(root); 2295 btrfs_btree_balance_dirty(root);
2204 2296
2205 trans = btrfs_start_transaction(root, 3); 2297 trans = btrfs_start_transaction(root, rsv_count);
2206 if (IS_ERR(trans)) { 2298 if (IS_ERR(trans)) {
2207 ret = PTR_ERR(trans); 2299 ret = PTR_ERR(trans);
2208 trans = NULL; 2300 trans = NULL;