aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2007-08-27 16:49:44 -0400
committerDavid Woodhouse <dwmw2@hera.kernel.org>2007-08-27 16:49:44 -0400
commita52d9a8033c454cd9b4697cfafb467509fc1693f (patch)
tree83d8bbf2df29598ea9c60498fadf5990262a11c8 /fs/btrfs/inode.c
parent83df7c1d8b885e216c68c3f0469bec9b93666b74 (diff)
Btrfs: Extent based page cache code. This uses an rbtree of extents and tests
instead of buffer heads. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c816
1 files changed, 306 insertions, 510 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 504d37dfa6c9..fd4cb65fcf1f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -121,6 +121,8 @@ void btrfs_read_locked_inode(struct inode *inode)
121 switch (inode->i_mode & S_IFMT) { 121 switch (inode->i_mode & S_IFMT) {
122 case S_IFREG: 122 case S_IFREG:
123 inode->i_mapping->a_ops = &btrfs_aops; 123 inode->i_mapping->a_ops = &btrfs_aops;
124 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
125 inode->i_mapping, GFP_NOFS);
124 inode->i_fop = &btrfs_file_operations; 126 inode->i_fop = &btrfs_file_operations;
125 inode->i_op = &btrfs_file_inode_operations; 127 inode->i_op = &btrfs_file_inode_operations;
126 break; 128 break;
@@ -169,7 +171,7 @@ static void fill_inode_item(struct btrfs_inode_item *item,
169 BTRFS_I(inode)->block_group->key.objectid); 171 BTRFS_I(inode)->block_group->key.objectid);
170} 172}
171 173
172static int btrfs_update_inode(struct btrfs_trans_handle *trans, 174int btrfs_update_inode(struct btrfs_trans_handle *trans,
173 struct btrfs_root *root, 175 struct btrfs_root *root,
174 struct inode *inode) 176 struct inode *inode)
175{ 177{
@@ -400,6 +402,7 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
400 int found_extent; 402 int found_extent;
401 int del_item; 403 int del_item;
402 404
405 btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
403 path = btrfs_alloc_path(); 406 path = btrfs_alloc_path();
404 path->reada = -1; 407 path->reada = -1;
405 BUG_ON(!path); 408 BUG_ON(!path);
@@ -511,6 +514,50 @@ error:
511 return ret; 514 return ret;
512} 515}
513 516
517static int btrfs_cow_one_page(struct btrfs_trans_handle *trans,
518 struct inode *inode, struct page *page,
519 size_t zero_start)
520{
521 char *kaddr;
522 int ret = 0;
523 struct btrfs_root *root = BTRFS_I(inode)->root;
524 u64 alloc_hint = 0;
525 u64 page_start = page->index << PAGE_CACHE_SHIFT;
526 struct btrfs_key ins;
527
528 if (!PagePrivate(page)) {
529 SetPagePrivate(page);
530 set_page_private(page, 1);
531 page_cache_get(page);
532 }
533
534 btrfs_set_trans_block_group(trans, inode);
535
536 ret = btrfs_drop_extents(trans, root, inode,
537 page_start, page_start + PAGE_CACHE_SIZE,
538 &alloc_hint);
539 if (ret)
540 goto out;
541 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
542 alloc_hint, (u64)-1, &ins, 1);
543 if (ret)
544 goto out;
545 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
546 page_start, ins.objectid, 1, 1);
547 if (ret)
548 goto out;
549 SetPageChecked(page);
550 kaddr = kmap(page);
551 if (zero_start != PAGE_CACHE_SIZE) {
552 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
553 flush_dcache_page(page);
554 }
555 kunmap(page);
556
557out:
558 return ret;
559}
560
514/* 561/*
515 * taken from block_truncate_page, but does cow as it zeros out 562 * taken from block_truncate_page, but does cow as it zeros out
516 * any bytes left in the last page in the file. 563 * any bytes left in the last page in the file.
@@ -518,16 +565,14 @@ error:
518static int btrfs_truncate_page(struct address_space *mapping, loff_t from) 565static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
519{ 566{
520 struct inode *inode = mapping->host; 567 struct inode *inode = mapping->host;
568 struct btrfs_root *root = BTRFS_I(inode)->root;
569 struct btrfs_trans_handle *trans;
521 unsigned blocksize = 1 << inode->i_blkbits; 570 unsigned blocksize = 1 << inode->i_blkbits;
522 pgoff_t index = from >> PAGE_CACHE_SHIFT; 571 pgoff_t index = from >> PAGE_CACHE_SHIFT;
523 unsigned offset = from & (PAGE_CACHE_SIZE-1); 572 unsigned offset = from & (PAGE_CACHE_SIZE-1);
524 struct page *page; 573 struct page *page;
525 char *kaddr;
526 int ret = 0; 574 int ret = 0;
527 struct btrfs_root *root = BTRFS_I(inode)->root; 575 u64 page_start;
528 u64 alloc_hint = 0;
529 struct btrfs_key ins;
530 struct btrfs_trans_handle *trans;
531 576
532 if ((offset & (blocksize - 1)) == 0) 577 if ((offset & (blocksize - 1)) == 0)
533 goto out; 578 goto out;
@@ -536,7 +581,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
536 page = grab_cache_page(mapping, index); 581 page = grab_cache_page(mapping, index);
537 if (!page) 582 if (!page)
538 goto out; 583 goto out;
539
540 if (!PageUptodate(page)) { 584 if (!PageUptodate(page)) {
541 ret = btrfs_readpage(NULL, page); 585 ret = btrfs_readpage(NULL, page);
542 lock_page(page); 586 lock_page(page);
@@ -545,37 +589,24 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
545 goto out; 589 goto out;
546 } 590 }
547 } 591 }
592 page_start = page->index << PAGE_CACHE_SHIFT;
593
548 mutex_lock(&root->fs_info->fs_mutex); 594 mutex_lock(&root->fs_info->fs_mutex);
549 trans = btrfs_start_transaction(root, 1); 595 trans = btrfs_start_transaction(root, 1);
550 btrfs_set_trans_block_group(trans, inode); 596 ret = btrfs_cow_one_page(trans, inode, page, offset);
551 597 if (!ret) {
552 ret = btrfs_drop_extents(trans, root, inode, 598 char *kaddr = kmap(page);
553 page->index << PAGE_CACHE_SHIFT, 599 ret = btrfs_csum_file_block(trans, root, inode->i_ino,
554 (page->index + 1) << PAGE_CACHE_SHIFT, 600 page_start, kaddr, PAGE_CACHE_SIZE);
555 &alloc_hint); 601 kunmap(page);
556 if (ret) 602 }
557 goto out; 603 set_extent_dirty(&BTRFS_I(inode)->extent_tree,
558 ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0, 604 page_start, page_start + PAGE_CACHE_SIZE - 1,
559 alloc_hint, (u64)-1, &ins, 1); 605 GFP_NOFS);
560 if (ret) 606 set_page_dirty(page);
561 goto out;
562 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
563 page->index << PAGE_CACHE_SHIFT,
564 ins.objectid, 1, 1);
565 if (ret)
566 goto out;
567 SetPageChecked(page);
568 kaddr = kmap(page);
569 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
570 flush_dcache_page(page);
571 ret = btrfs_csum_file_block(trans, root, inode->i_ino,
572 page->index << PAGE_CACHE_SHIFT,
573 kaddr, PAGE_CACHE_SIZE);
574 kunmap(page);
575 btrfs_end_transaction(trans, root); 607 btrfs_end_transaction(trans, root);
576 mutex_unlock(&root->fs_info->fs_mutex); 608 mutex_unlock(&root->fs_info->fs_mutex);
577 609
578 set_page_dirty(page);
579 unlock_page(page); 610 unlock_page(page);
580 page_cache_release(page); 611 page_cache_release(page);
581out: 612out:
@@ -1095,6 +1126,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
1095 inode->i_mapping->a_ops = &btrfs_aops; 1126 inode->i_mapping->a_ops = &btrfs_aops;
1096 inode->i_fop = &btrfs_file_operations; 1127 inode->i_fop = &btrfs_file_operations;
1097 inode->i_op = &btrfs_file_inode_operations; 1128 inode->i_op = &btrfs_file_inode_operations;
1129 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1130 inode->i_mapping, GFP_NOFS);
1098 } 1131 }
1099 dir->i_sb->s_dirt = 1; 1132 dir->i_sb->s_dirt = 1;
1100 btrfs_update_inode_block_group(trans, inode); 1133 btrfs_update_inode_block_group(trans, inode);
@@ -1238,6 +1271,182 @@ out_unlock:
1238 return err; 1271 return err;
1239} 1272}
1240 1273
1274struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1275 size_t page_offset, u64 start, u64 end,
1276 int create)
1277{
1278 int ret;
1279 int err = 0;
1280 u64 blocknr;
1281 u64 extent_start = 0;
1282 u64 extent_end = 0;
1283 u64 objectid = inode->i_ino;
1284 u32 found_type;
1285 int failed_insert = 0;
1286 struct btrfs_path *path;
1287 struct btrfs_root *root = BTRFS_I(inode)->root;
1288 struct btrfs_file_extent_item *item;
1289 struct btrfs_leaf *leaf;
1290 struct btrfs_disk_key *found_key;
1291 struct extent_map *em = NULL;
1292 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1293 struct btrfs_trans_handle *trans = NULL;
1294
1295 path = btrfs_alloc_path();
1296 BUG_ON(!path);
1297 mutex_lock(&root->fs_info->fs_mutex);
1298
1299again:
1300 em = lookup_extent_mapping(em_tree, start, end);
1301 if (em) {
1302 goto out;
1303 }
1304 if (!em) {
1305 em = alloc_extent_map(GFP_NOFS);
1306 if (!em) {
1307 err = -ENOMEM;
1308 goto out;
1309 }
1310 em->start = 0;
1311 em->end = 0;
1312 }
1313 em->bdev = inode->i_sb->s_bdev;
1314 ret = btrfs_lookup_file_extent(NULL, root, path,
1315 objectid, start, 0);
1316 if (ret < 0) {
1317 err = ret;
1318 goto out;
1319 }
1320
1321 if (ret != 0) {
1322 if (path->slots[0] == 0)
1323 goto not_found;
1324 path->slots[0]--;
1325 }
1326
1327 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1328 struct btrfs_file_extent_item);
1329 leaf = btrfs_buffer_leaf(path->nodes[0]);
1330 blocknr = btrfs_file_extent_disk_blocknr(item);
1331 blocknr += btrfs_file_extent_offset(item);
1332
1333 /* are we inside the extent that was found? */
1334 found_key = &leaf->items[path->slots[0]].key;
1335 found_type = btrfs_disk_key_type(found_key);
1336 if (btrfs_disk_key_objectid(found_key) != objectid ||
1337 found_type != BTRFS_EXTENT_DATA_KEY) {
1338 goto not_found;
1339 }
1340
1341 found_type = btrfs_file_extent_type(item);
1342 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1343 if (found_type == BTRFS_FILE_EXTENT_REG) {
1344 extent_end = extent_start +
1345 (btrfs_file_extent_num_blocks(item) << inode->i_blkbits);
1346 err = 0;
1347 if (start < extent_start || start > extent_end) {
1348 em->start = start;
1349 if (start < extent_start) {
1350 em->end = extent_end - 1;
1351 } else {
1352 em->end = end;
1353 }
1354 goto not_found_em;
1355 }
1356 if (btrfs_file_extent_disk_blocknr(item) == 0) {
1357 em->start = extent_start;
1358 em->end = extent_end - 1;
1359 em->block_start = 0;
1360 em->block_end = 0;
1361 goto insert;
1362 }
1363 em->block_start = blocknr << inode->i_blkbits;
1364 em->block_end = em->block_start +
1365 (btrfs_file_extent_num_blocks(item) <<
1366 inode->i_blkbits) - 1;
1367 em->start = extent_start;
1368 em->end = extent_end - 1;
1369 goto insert;
1370 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1371 char *ptr;
1372 char *map;
1373 u32 size;
1374
1375 size = btrfs_file_extent_inline_len(leaf->items +
1376 path->slots[0]);
1377 extent_end = extent_start + size;
1378 if (start < extent_start || start > extent_end) {
1379 em->start = start;
1380 if (start < extent_start) {
1381 em->end = extent_end - 1;
1382 } else {
1383 em->end = end;
1384 }
1385 goto not_found_em;
1386 }
1387 em->block_start = EXTENT_MAP_INLINE;
1388 em->block_end = EXTENT_MAP_INLINE;
1389 em->start = extent_start;
1390 em->end = extent_end - 1;
1391 if (!page) {
1392 goto insert;
1393 }
1394 ptr = btrfs_file_extent_inline_start(item);
1395 map = kmap(page);
1396 memcpy(map + page_offset, ptr, size);
1397 flush_dcache_page(result->b_page);
1398 kunmap(page);
1399 set_extent_uptodate(em_tree, extent_start,
1400 extent_end, GFP_NOFS);
1401 goto insert;
1402 } else {
1403 printk("unkknown found_type %d\n", found_type);
1404 WARN_ON(1);
1405 }
1406not_found:
1407 em->start = start;
1408 em->end = end;
1409not_found_em:
1410 em->block_start = 0;
1411 em->block_end = 0;
1412insert:
1413 btrfs_release_path(root, path);
1414 if (em->start > start || em->end < start) {
1415 printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end);
1416 WARN_ON(1);
1417 err = -EIO;
1418 goto out;
1419 }
1420 ret = add_extent_mapping(em_tree, em);
1421 if (ret == -EEXIST) {
1422 free_extent_map(em);
1423 failed_insert++;
1424 if (failed_insert > 5) {
1425 printk("failing to insert %Lu %Lu\n", start, end);
1426 err = -EIO;
1427 goto out;
1428 }
1429 em = NULL;
1430 goto again;
1431 }
1432 err = 0;
1433out:
1434 btrfs_free_path(path);
1435 if (trans) {
1436 ret = btrfs_end_transaction(trans, root);
1437 if (!err)
1438 err = ret;
1439 }
1440 mutex_unlock(&root->fs_info->fs_mutex);
1441 if (err) {
1442 free_extent_map(em);
1443 WARN_ON(1);
1444 return ERR_PTR(err);
1445 }
1446 return em;
1447}
1448
1449
1241/* 1450/*
1242 * FIBMAP and others want to pass in a fake buffer head. They need to 1451 * FIBMAP and others want to pass in a fake buffer head. They need to
1243 * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy 1452 * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
@@ -1398,46 +1607,22 @@ int btrfs_get_block(struct inode *inode, sector_t iblock,
1398 return err; 1607 return err;
1399} 1608}
1400 1609
1401static int btrfs_get_block_csum(struct inode *inode, sector_t iblock,
1402 struct buffer_head *result, int create)
1403{
1404 int ret;
1405 struct btrfs_root *root = BTRFS_I(inode)->root;
1406 struct page *page = result->b_page;
1407 u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result);
1408 struct btrfs_csum_item *item;
1409 struct btrfs_path *path = NULL;
1410
1411 mutex_lock(&root->fs_info->fs_mutex);
1412 ret = btrfs_get_block_lock(inode, iblock, result, create);
1413 if (ret)
1414 goto out;
1415
1416 path = btrfs_alloc_path();
1417 item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0);
1418 if (IS_ERR(item)) {
1419 ret = PTR_ERR(item);
1420 /* a csum that isn't present is a preallocated region. */
1421 if (ret == -ENOENT || ret == -EFBIG)
1422 ret = 0;
1423 result->b_private = NULL;
1424 goto out;
1425 }
1426 memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE);
1427out:
1428 if (path)
1429 btrfs_free_path(path);
1430 mutex_unlock(&root->fs_info->fs_mutex);
1431 return ret;
1432}
1433
1434static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, 1610static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1435 struct buffer_head *result, int create) 1611 struct buffer_head *result, int create)
1436{ 1612{
1437 struct btrfs_root *root = BTRFS_I(inode)->root; 1613 struct btrfs_root *root = BTRFS_I(inode)->root;
1438 mutex_lock(&root->fs_info->fs_mutex); 1614 u64 start = iblock << inode->i_blkbits;
1439 btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT); 1615 u64 end = start + root->blocksize -1;
1440 mutex_unlock(&root->fs_info->fs_mutex); 1616 struct extent_map *em;
1617
1618 em = btrfs_get_extent(inode, NULL, 0, start, end, 0);
1619 if (em && !IS_ERR(em) && em->block_start != EXTENT_MAP_INLINE &&
1620 em->block_start != 0) {
1621 u64 offset;
1622 offset = start - em->start;
1623 start = (em->block_start + offset) >> inode->i_blkbits;
1624 btrfs_map_bh_to_logical(root, result, start);
1625 }
1441 return 0; 1626 return 0;
1442} 1627}
1443 1628
@@ -1449,442 +1634,50 @@ static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1449static int btrfs_prepare_write(struct file *file, struct page *page, 1634static int btrfs_prepare_write(struct file *file, struct page *page,
1450 unsigned from, unsigned to) 1635 unsigned from, unsigned to)
1451{ 1636{
1452 return block_prepare_write(page, from, to, btrfs_get_block); 1637 return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree,
1638 page->mapping->host, page, from, to,
1639 btrfs_get_extent);
1453} 1640}
1454 1641
1455static void buffer_io_error(struct buffer_head *bh) 1642int btrfs_readpage(struct file *file, struct page *page)
1456{ 1643{
1457 char b[BDEVNAME_SIZE]; 1644 struct extent_map_tree *tree;
1458 1645 tree = &BTRFS_I(page->mapping->host)->extent_tree;
1459 printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", 1646 return extent_read_full_page(tree, page, btrfs_get_extent);
1460 bdevname(bh->b_bdev, b),
1461 (unsigned long long)bh->b_blocknr);
1462} 1647}
1463 1648static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1464/*
1465 * I/O completion handler for block_read_full_page() - pages
1466 * which come unlocked at the end of I/O.
1467 */
1468static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
1469{ 1649{
1470 unsigned long flags; 1650 struct extent_map_tree *tree;
1471 struct buffer_head *first; 1651 tree = &BTRFS_I(page->mapping->host)->extent_tree;
1472 struct buffer_head *tmp; 1652 return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
1473 struct page *page;
1474 int page_uptodate = 1;
1475 struct inode *inode;
1476 int ret;
1477
1478 BUG_ON(!buffer_async_read(bh));
1479
1480 page = bh->b_page;
1481 inode = page->mapping->host;
1482 if (uptodate) {
1483 void *kaddr;
1484 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
1485 if (bh->b_private) {
1486 char csum[BTRFS_CRC32_SIZE];
1487 kaddr = kmap_atomic(page, KM_IRQ0);
1488 ret = btrfs_csum_data(root, kaddr + bh_offset(bh),
1489 bh->b_size, csum);
1490 BUG_ON(ret);
1491 if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) {
1492 u64 offset;
1493 offset = (page->index << PAGE_CACHE_SHIFT) +
1494 bh_offset(bh);
1495 printk("btrfs csum failed ino %lu off %llu\n",
1496 page->mapping->host->i_ino,
1497 (unsigned long long)offset);
1498 memset(kaddr + bh_offset(bh), 1, bh->b_size);
1499 flush_dcache_page(page);
1500 }
1501 kunmap_atomic(kaddr, KM_IRQ0);
1502 }
1503 set_buffer_uptodate(bh);
1504 } else {
1505 clear_buffer_uptodate(bh);
1506 if (printk_ratelimit())
1507 buffer_io_error(bh);
1508 SetPageError(page);
1509 }
1510
1511 /*
1512 * Be _very_ careful from here on. Bad things can happen if
1513 * two buffer heads end IO at almost the same time and both
1514 * decide that the page is now completely done.
1515 */
1516 first = page_buffers(page);
1517 local_irq_save(flags);
1518 bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
1519 clear_buffer_async_read(bh);
1520 unlock_buffer(bh);
1521 tmp = bh;
1522 do {
1523 if (!buffer_uptodate(tmp))
1524 page_uptodate = 0;
1525 if (buffer_async_read(tmp)) {
1526 BUG_ON(!buffer_locked(tmp));
1527 goto still_busy;
1528 }
1529 tmp = tmp->b_this_page;
1530 } while (tmp != bh);
1531 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
1532 local_irq_restore(flags);
1533
1534 /*
1535 * If none of the buffers had errors and they are all
1536 * uptodate then we can set the page uptodate.
1537 */
1538 if (page_uptodate && !PageError(page))
1539 SetPageUptodate(page);
1540 unlock_page(page);
1541 return;
1542
1543still_busy:
1544 bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
1545 local_irq_restore(flags);
1546 return;
1547} 1653}
1548 1654
1549/* 1655static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1550 * Generic "read page" function for block devices that have the normal
1551 * get_block functionality. This is most of the block device filesystems.
1552 * Reads the page asynchronously --- the unlock_buffer() and
1553 * set/clear_buffer_uptodate() functions propagate buffer state into the
1554 * page struct once IO has completed.
1555 */
1556int btrfs_readpage(struct file *file, struct page *page)
1557{ 1656{
1558 struct inode *inode = page->mapping->host; 1657 struct extent_map_tree *tree;
1559 sector_t iblock, lblock; 1658 int ret;
1560 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
1561 unsigned int blocksize;
1562 int nr, i;
1563 int fully_mapped = 1;
1564
1565 BUG_ON(!PageLocked(page));
1566 blocksize = 1 << inode->i_blkbits;
1567 if (!page_has_buffers(page))
1568 create_empty_buffers(page, blocksize, 0);
1569 head = page_buffers(page);
1570
1571 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1572 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
1573 bh = head;
1574 nr = 0;
1575 i = 0;
1576
1577 do {
1578 if (buffer_uptodate(bh))
1579 continue;
1580
1581 if (!buffer_mapped(bh)) {
1582 int err = 0;
1583
1584 fully_mapped = 0;
1585 if (iblock < lblock) {
1586 WARN_ON(bh->b_size != blocksize);
1587 err = btrfs_get_block_csum(inode, iblock,
1588 bh, 0);
1589 if (err)
1590 SetPageError(page);
1591 }
1592 if (!buffer_mapped(bh)) {
1593 void *kaddr = kmap_atomic(page, KM_USER0);
1594 memset(kaddr + i * blocksize, 0, blocksize);
1595 flush_dcache_page(page);
1596 kunmap_atomic(kaddr, KM_USER0);
1597 if (!err)
1598 set_buffer_uptodate(bh);
1599 continue;
1600 }
1601 /*
1602 * get_block() might have updated the buffer
1603 * synchronously
1604 */
1605 if (buffer_uptodate(bh))
1606 continue;
1607 }
1608 arr[nr++] = bh;
1609 } while (i++, iblock++, (bh = bh->b_this_page) != head);
1610
1611 if (fully_mapped)
1612 SetPageMappedToDisk(page);
1613
1614 if (!nr) {
1615 /*
1616 * All buffers are uptodate - we can set the page uptodate
1617 * as well. But not if get_block() returned an error.
1618 */
1619 if (!PageError(page))
1620 SetPageUptodate(page);
1621 unlock_page(page);
1622 return 0;
1623 }
1624
1625 /* Stage two: lock the buffers */
1626 for (i = 0; i < nr; i++) {
1627 bh = arr[i];
1628 lock_buffer(bh);
1629 bh->b_end_io = btrfs_end_buffer_async_read;
1630 set_buffer_async_read(bh);
1631 }
1632
1633 /*
1634 * Stage 3: start the IO. Check for uptodateness
1635 * inside the buffer lock in case another process reading
1636 * the underlying blockdev brought it uptodate (the sct fix).
1637 */
1638 for (i = 0; i < nr; i++) {
1639 bh = arr[i];
1640 if (buffer_uptodate(bh))
1641 btrfs_end_buffer_async_read(bh, 1);
1642 else
1643 submit_bh(READ, bh);
1644 }
1645 return 0;
1646}
1647
1648/*
1649 * Aside from a tiny bit of packed file data handling, this is the
1650 * same as the generic code.
1651 *
1652 * While block_write_full_page is writing back the dirty buffers under
1653 * the page lock, whoever dirtied the buffers may decide to clean them
1654 * again at any time. We handle that by only looking at the buffer
1655 * state inside lock_buffer().
1656 *
1657 * If block_write_full_page() is called for regular writeback
1658 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1659 * locked buffer. This only can happen if someone has written the buffer
1660 * directly, with submit_bh(). At the address_space level PageWriteback
1661 * prevents this contention from occurring.
1662 */
1663static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1664 struct writeback_control *wbc)
1665{
1666 int err;
1667 sector_t block;
1668 sector_t last_block;
1669 struct buffer_head *bh, *head;
1670 const unsigned blocksize = 1 << inode->i_blkbits;
1671 int nr_underway = 0;
1672 struct btrfs_root *root = BTRFS_I(inode)->root;
1673
1674 BUG_ON(!PageLocked(page));
1675
1676 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1677
1678 /* no csumming allowed when from PF_MEMALLOC */
1679 if (current->flags & PF_MEMALLOC) {
1680 redirty_page_for_writepage(wbc, page);
1681 unlock_page(page);
1682 return 0;
1683 }
1684 1659
1685 if (!page_has_buffers(page)) { 1660 if (page->private != 1) {
1686 create_empty_buffers(page, blocksize, 1661 WARN_ON(1);
1687 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1662 return try_to_free_buffers(page);
1688 } 1663 }
1689 1664 tree = &BTRFS_I(page->mapping->host)->extent_tree;
1690 /* 1665 ret = try_release_extent_mapping(tree, page);
1691 * Be very careful. We have no exclusion from __set_page_dirty_buffers 1666 if (ret == 1) {
1692 * here, and the (potentially unmapped) buffers may become dirty at 1667 ClearPagePrivate(page);
1693 * any time. If a buffer becomes dirty here after we've inspected it 1668 set_page_private(page, 0);
1694 * then we just miss that fact, and the page stays dirty. 1669 page_cache_release(page);
1695 *
1696 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1697 * handle that here by just cleaning them.
1698 */
1699
1700 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1701 head = page_buffers(page);
1702 bh = head;
1703
1704 /*
1705 * Get all the dirty buffers mapped to disk addresses and
1706 * handle any aliases from the underlying blockdev's mapping.
1707 */
1708 do {
1709 if (block > last_block) {
1710 /*
1711 * mapped buffers outside i_size will occur, because
1712 * this page can be outside i_size when there is a
1713 * truncate in progress.
1714 */
1715 /*
1716 * The buffer was zeroed by block_write_full_page()
1717 */
1718 clear_buffer_dirty(bh);
1719 set_buffer_uptodate(bh);
1720 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1721 WARN_ON(bh->b_size != blocksize);
1722 err = btrfs_get_block(inode, block, bh, 0);
1723 if (err) {
1724 goto recover;
1725 }
1726 if (buffer_new(bh)) {
1727 /* blockdev mappings never come here */
1728 clear_buffer_new(bh);
1729 }
1730 }
1731 bh = bh->b_this_page;
1732 block++;
1733 } while (bh != head);
1734
1735 do {
1736 if (!buffer_mapped(bh))
1737 continue;
1738 /*
1739 * If it's a fully non-blocking write attempt and we cannot
1740 * lock the buffer then redirty the page. Note that this can
1741 * potentially cause a busy-wait loop from pdflush and kswapd
1742 * activity, but those code paths have their own higher-level
1743 * throttling.
1744 */
1745 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1746 lock_buffer(bh);
1747 } else if (test_set_buffer_locked(bh)) {
1748 redirty_page_for_writepage(wbc, page);
1749 continue;
1750 }
1751 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1752 struct btrfs_trans_handle *trans;
1753 int ret;
1754 u64 off = page->index << PAGE_CACHE_SHIFT;
1755 char *kaddr;
1756
1757 off += bh_offset(bh);
1758 mutex_lock(&root->fs_info->fs_mutex);
1759 trans = btrfs_start_transaction(root, 1);
1760 btrfs_set_trans_block_group(trans, inode);
1761 kaddr = kmap(page);
1762 btrfs_csum_file_block(trans, root, inode->i_ino,
1763 off, kaddr + bh_offset(bh),
1764 bh->b_size);
1765 kunmap(page);
1766 ret = btrfs_end_transaction(trans, root);
1767 BUG_ON(ret);
1768 mutex_unlock(&root->fs_info->fs_mutex);
1769 mark_buffer_async_write(bh);
1770 } else {
1771 unlock_buffer(bh);
1772 }
1773 } while ((bh = bh->b_this_page) != head);
1774
1775 /*
1776 * The page and its buffers are protected by PageWriteback(), so we can
1777 * drop the bh refcounts early.
1778 */
1779 BUG_ON(PageWriteback(page));
1780 set_page_writeback(page);
1781
1782 do {
1783 struct buffer_head *next = bh->b_this_page;
1784 if (buffer_async_write(bh)) {
1785 submit_bh(WRITE, bh);
1786 nr_underway++;
1787 }
1788 bh = next;
1789 } while (bh != head);
1790 unlock_page(page);
1791
1792 err = 0;
1793done:
1794 if (nr_underway == 0) {
1795 /*
1796 * The page was marked dirty, but the buffers were
1797 * clean. Someone wrote them back by hand with
1798 * ll_rw_block/submit_bh. A rare case.
1799 */
1800 int uptodate = 1;
1801 do {
1802 if (!buffer_uptodate(bh)) {
1803 uptodate = 0;
1804 break;
1805 }
1806 bh = bh->b_this_page;
1807 } while (bh != head);
1808 if (uptodate)
1809 SetPageUptodate(page);
1810 end_page_writeback(page);
1811 } 1670 }
1812 return err; 1671 return ret;
1813
1814recover:
1815 /*
1816 * ENOSPC, or some other error. We may already have added some
1817 * blocks to the file, so we need to write these out to avoid
1818 * exposing stale data.
1819 * The page is currently locked and not marked for writeback
1820 */
1821 bh = head;
1822 /* Recovery: lock and submit the mapped buffers */
1823 do {
1824 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1825 lock_buffer(bh);
1826 mark_buffer_async_write(bh);
1827 } else {
1828 /*
1829 * The buffer may have been set dirty during
1830 * attachment to a dirty page.
1831 */
1832 clear_buffer_dirty(bh);
1833 }
1834 } while ((bh = bh->b_this_page) != head);
1835 SetPageError(page);
1836 BUG_ON(PageWriteback(page));
1837 set_page_writeback(page);
1838 do {
1839 struct buffer_head *next = bh->b_this_page;
1840 if (buffer_async_write(bh)) {
1841 clear_buffer_dirty(bh);
1842 submit_bh(WRITE, bh);
1843 nr_underway++;
1844 }
1845 bh = next;
1846 } while (bh != head);
1847 unlock_page(page);
1848 goto done;
1849} 1672}
1850 1673
1851static int btrfs_writepage(struct page *page, struct writeback_control *wbc) 1674static void btrfs_invalidatepage(struct page *page, unsigned long offset)
1852{ 1675{
1853 struct inode * const inode = page->mapping->host; 1676 struct extent_map_tree *tree;
1854 loff_t i_size = i_size_read(inode);
1855 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1856 unsigned offset;
1857 void *kaddr;
1858
1859 /* Is the page fully inside i_size? */
1860 if (page->index < end_index)
1861 return __btrfs_write_full_page(inode, page, wbc);
1862
1863 /* Is the page fully outside i_size? (truncate in progress) */
1864 offset = i_size & (PAGE_CACHE_SIZE-1);
1865 if (page->index >= end_index+1 || !offset) {
1866 /*
1867 * The page may have dirty, unmapped buffers. For example,
1868 * they may have been added in ext3_writepage(). Make them
1869 * freeable here, so the page does not leak.
1870 */
1871 block_invalidatepage(page, 0);
1872 unlock_page(page);
1873 return 0; /* don't care */
1874 }
1875 1677
1876 /* 1678 tree = &BTRFS_I(page->mapping->host)->extent_tree;
1877 * The page straddles i_size. It must be zeroed out on each and every 1679 extent_invalidatepage(tree, page, offset);
1878 * writepage invokation because it may be mmapped. "A file is mapped 1680 btrfs_releasepage(page, GFP_NOFS);
1879 * in multiples of the page size. For a file that is not a multiple of
1880 * the page size, the remaining memory is zeroed when mapped, and
1881 * writes to that region are not written out to the file."
1882 */
1883 kaddr = kmap_atomic(page, KM_USER0);
1884 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1885 flush_dcache_page(page);
1886 kunmap_atomic(kaddr, KM_USER0);
1887 return __btrfs_write_full_page(inode, page, wbc);
1888} 1681}
1889 1682
1890/* 1683/*
@@ -1905,28 +1698,39 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1905int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 1698int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1906{ 1699{
1907 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1700 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1701 struct btrfs_root *root = BTRFS_I(inode)->root;
1702 struct btrfs_trans_handle *trans;
1908 unsigned long end; 1703 unsigned long end;
1909 loff_t size; 1704 loff_t size;
1910 int ret = -EINVAL; 1705 int ret = -EINVAL;
1706 u64 page_start;
1911 1707
1912 lock_page(page); 1708 lock_page(page);
1913 wait_on_page_writeback(page); 1709 wait_on_page_writeback(page);
1914 size = i_size_read(inode); 1710 size = i_size_read(inode);
1711 page_start = page->index << PAGE_CACHE_SHIFT;
1712
1915 if ((page->mapping != inode->i_mapping) || 1713 if ((page->mapping != inode->i_mapping) ||
1916 ((page->index << PAGE_CACHE_SHIFT) > size)) { 1714 (page_start > size)) {
1917 /* page got truncated out from underneath us */ 1715 /* page got truncated out from underneath us */
1918 goto out_unlock; 1716 goto out_unlock;
1919 } 1717 }
1920 1718
1921 /* page is wholly or partially inside EOF */ 1719 /* page is wholly or partially inside EOF */
1922 if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) 1720 if (page_start + PAGE_CACHE_SIZE > size)
1923 end = size & ~PAGE_CACHE_MASK; 1721 end = size & ~PAGE_CACHE_MASK;
1924 else 1722 else
1925 end = PAGE_CACHE_SIZE; 1723 end = PAGE_CACHE_SIZE;
1926 1724
1927 ret = btrfs_prepare_write(NULL, page, 0, end); 1725 mutex_lock(&root->fs_info->fs_mutex);
1928 if (!ret) 1726 trans = btrfs_start_transaction(root, 1);
1929 ret = btrfs_commit_write(NULL, page, 0, end); 1727 ret = btrfs_cow_one_page(trans, inode, page, end);
1728 btrfs_end_transaction(trans, root);
1729 mutex_unlock(&root->fs_info->fs_mutex);
1730 set_extent_dirty(&BTRFS_I(inode)->extent_tree,
1731 page_start, page_start + PAGE_CACHE_SIZE - 1,
1732 GFP_NOFS);
1733 set_page_dirty(page);
1930 1734
1931out_unlock: 1735out_unlock:
1932 unlock_page(page); 1736 unlock_page(page);
@@ -1962,21 +1766,8 @@ static void btrfs_truncate(struct inode *inode)
1962int btrfs_commit_write(struct file *file, struct page *page, 1766int btrfs_commit_write(struct file *file, struct page *page,
1963 unsigned from, unsigned to) 1767 unsigned from, unsigned to)
1964{ 1768{
1965 struct inode *inode = page->mapping->host; 1769 return extent_commit_write(&BTRFS_I(page->mapping->host)->extent_tree,
1966 struct buffer_head *bh; 1770 page->mapping->host, page, from, to);
1967 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1968
1969 SetPageUptodate(page);
1970 bh = page_buffers(page);
1971 set_buffer_uptodate(bh);
1972 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1973 set_page_dirty(page);
1974 }
1975 if (pos > inode->i_size) {
1976 i_size_write(inode, pos);
1977 mark_inode_dirty(inode);
1978 }
1979 return 0;
1980} 1771}
1981 1772
1982static int create_subvol(struct btrfs_root *root, char *name, int namelen) 1773static int create_subvol(struct btrfs_root *root, char *name, int namelen)
@@ -2471,6 +2262,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2471 inode->i_mapping->a_ops = &btrfs_aops; 2262 inode->i_mapping->a_ops = &btrfs_aops;
2472 inode->i_fop = &btrfs_file_operations; 2263 inode->i_fop = &btrfs_file_operations;
2473 inode->i_op = &btrfs_file_inode_operations; 2264 inode->i_op = &btrfs_file_inode_operations;
2265 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
2266 inode->i_mapping, GFP_NOFS);
2474 } 2267 }
2475 dir->i_sb->s_dirt = 1; 2268 dir->i_sb->s_dirt = 1;
2476 btrfs_update_inode_block_group(trans, inode); 2269 btrfs_update_inode_block_group(trans, inode);
@@ -2553,6 +2346,9 @@ static struct address_space_operations btrfs_aops = {
2553 .prepare_write = btrfs_prepare_write, 2346 .prepare_write = btrfs_prepare_write,
2554 .commit_write = btrfs_commit_write, 2347 .commit_write = btrfs_commit_write,
2555 .bmap = btrfs_bmap, 2348 .bmap = btrfs_bmap,
2349 .invalidatepage = btrfs_invalidatepage,
2350 .releasepage = btrfs_releasepage,
2351 .set_page_dirty = __set_page_dirty_nobuffers,
2556}; 2352};
2557 2353
2558static struct address_space_operations btrfs_symlink_aops = { 2354static struct address_space_operations btrfs_symlink_aops = {