diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 303 |
1 files changed, 297 insertions, 6 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e639cc9e089f..def33ac90d77 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
32 | #include <linux/statfs.h> | 32 | #include <linux/statfs.h> |
33 | #include <linux/compat.h> | 33 | #include <linux/compat.h> |
34 | #include <linux/bit_spinlock.h> | ||
34 | #include "ctree.h" | 35 | #include "ctree.h" |
35 | #include "disk-io.h" | 36 | #include "disk-io.h" |
36 | #include "transaction.h" | 37 | #include "transaction.h" |
@@ -561,7 +562,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
561 | goto out; | 562 | goto out; |
562 | 563 | ||
563 | if (!PageUptodate(page)) { | 564 | if (!PageUptodate(page)) { |
564 | ret = mpage_readpage(page, btrfs_get_block); | 565 | ret = btrfs_readpage(NULL, page); |
565 | lock_page(page); | 566 | lock_page(page); |
566 | if (!PageUptodate(page)) { | 567 | if (!PageUptodate(page)) { |
567 | ret = -EIO; | 568 | ret = -EIO; |
@@ -1257,8 +1258,10 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, | |||
1257 | path = btrfs_alloc_path(); | 1258 | path = btrfs_alloc_path(); |
1258 | BUG_ON(!path); | 1259 | BUG_ON(!path); |
1259 | if (create & BTRFS_GET_BLOCK_CREATE) { | 1260 | if (create & BTRFS_GET_BLOCK_CREATE) { |
1260 | WARN_ON(1); | 1261 | /* |
1261 | /* this almost but not quite works */ | 1262 | * danger!, this only works if the page is properly up |
1263 | * to date somehow | ||
1264 | */ | ||
1262 | trans = btrfs_start_transaction(root, 1); | 1265 | trans = btrfs_start_transaction(root, 1); |
1263 | if (!trans) { | 1266 | if (!trans) { |
1264 | err = -ENOMEM; | 1267 | err = -ENOMEM; |
@@ -1353,7 +1356,6 @@ not_found: | |||
1353 | ins.objectid, ins.offset, | 1356 | ins.objectid, ins.offset, |
1354 | ins.offset); | 1357 | ins.offset); |
1355 | BUG_ON(ret); | 1358 | BUG_ON(ret); |
1356 | SetPageChecked(result->b_page); | ||
1357 | btrfs_map_bh_to_logical(root, result, ins.objectid); | 1359 | btrfs_map_bh_to_logical(root, result, ins.objectid); |
1358 | } | 1360 | } |
1359 | out: | 1361 | out: |
@@ -1374,6 +1376,40 @@ int btrfs_get_block(struct inode *inode, sector_t iblock, | |||
1374 | return err; | 1376 | return err; |
1375 | } | 1377 | } |
1376 | 1378 | ||
1379 | int btrfs_get_block_csum(struct inode *inode, sector_t iblock, | ||
1380 | struct buffer_head *result, int create) | ||
1381 | { | ||
1382 | int ret; | ||
1383 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1384 | struct page *page = result->b_page; | ||
1385 | u64 offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(result); | ||
1386 | struct btrfs_csum_item *item; | ||
1387 | struct btrfs_path *path = NULL; | ||
1388 | |||
1389 | mutex_lock(&root->fs_info->fs_mutex); | ||
1390 | ret = btrfs_get_block_lock(inode, iblock, result, create); | ||
1391 | if (ret) | ||
1392 | goto out; | ||
1393 | |||
1394 | path = btrfs_alloc_path(); | ||
1395 | item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, offset, 0); | ||
1396 | if (IS_ERR(item)) { | ||
1397 | ret = PTR_ERR(item); | ||
1398 | /* a csum that isn't present is a preallocated region. */ | ||
1399 | if (ret == -ENOENT || ret == -EFBIG) | ||
1400 | ret = 0; | ||
1401 | result->b_private = 0; | ||
1402 | goto out; | ||
1403 | } | ||
1404 | memcpy((char *)&result->b_private, &item->csum, BTRFS_CRC32_SIZE); | ||
1405 | printk("get_block_sum file %lu offset %llu csum %X\n", inode->i_ino, (unsigned long long)offset, *(int *)(&item->csum)); | ||
1406 | out: | ||
1407 | if (path) | ||
1408 | btrfs_free_path(path); | ||
1409 | mutex_unlock(&root->fs_info->fs_mutex); | ||
1410 | return ret; | ||
1411 | } | ||
1412 | |||
1377 | static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, | 1413 | static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock, |
1378 | struct buffer_head *result, int create) | 1414 | struct buffer_head *result, int create) |
1379 | { | 1415 | { |
@@ -1395,9 +1431,198 @@ static int btrfs_prepare_write(struct file *file, struct page *page, | |||
1395 | return block_prepare_write(page, from, to, btrfs_get_block); | 1431 | return block_prepare_write(page, from, to, btrfs_get_block); |
1396 | } | 1432 | } |
1397 | 1433 | ||
1398 | static int btrfs_readpage(struct file *file, struct page *page) | 1434 | static void buffer_io_error(struct buffer_head *bh) |
1435 | { | ||
1436 | char b[BDEVNAME_SIZE]; | ||
1437 | |||
1438 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n", | ||
1439 | bdevname(bh->b_bdev, b), | ||
1440 | (unsigned long long)bh->b_blocknr); | ||
1441 | } | ||
1442 | |||
1443 | /* | ||
1444 | * I/O completion handler for block_read_full_page() - pages | ||
1445 | * which come unlocked at the end of I/O. | ||
1446 | */ | ||
1447 | static void btrfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) | ||
1448 | { | ||
1449 | unsigned long flags; | ||
1450 | struct buffer_head *first; | ||
1451 | struct buffer_head *tmp; | ||
1452 | struct page *page; | ||
1453 | int page_uptodate = 1; | ||
1454 | struct inode *inode; | ||
1455 | int ret; | ||
1456 | |||
1457 | BUG_ON(!buffer_async_read(bh)); | ||
1458 | |||
1459 | page = bh->b_page; | ||
1460 | inode = page->mapping->host; | ||
1461 | if (uptodate) { | ||
1462 | void *kaddr; | ||
1463 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
1464 | if (bh->b_private) { | ||
1465 | char csum[BTRFS_CRC32_SIZE]; | ||
1466 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
1467 | ret = btrfs_csum_data(root, kaddr + bh_offset(bh), | ||
1468 | bh->b_size, csum); | ||
1469 | BUG_ON(ret); | ||
1470 | if (memcmp(csum, &bh->b_private, BTRFS_CRC32_SIZE)) { | ||
1471 | u64 offset; | ||
1472 | offset = (page->index << PAGE_CACHE_SHIFT) + | ||
1473 | bh_offset(bh); | ||
1474 | printk("btrfs csum failed ino %lu off %llu\n", | ||
1475 | page->mapping->host->i_ino, | ||
1476 | (unsigned long long)offset); | ||
1477 | memset(kaddr + bh_offset(bh), 1, bh->b_size); | ||
1478 | flush_dcache_page(page); | ||
1479 | printk("bad verify file %lu offset %llu bh_private %lX csum %X\n", inode->i_ino, (unsigned long long)offset, (unsigned long)(bh->b_private), *(int *)csum); | ||
1480 | } | ||
1481 | kunmap_atomic(kaddr, KM_IRQ0); | ||
1482 | } | ||
1483 | set_buffer_uptodate(bh); | ||
1484 | } else { | ||
1485 | clear_buffer_uptodate(bh); | ||
1486 | if (printk_ratelimit()) | ||
1487 | buffer_io_error(bh); | ||
1488 | SetPageError(page); | ||
1489 | } | ||
1490 | |||
1491 | /* | ||
1492 | * Be _very_ careful from here on. Bad things can happen if | ||
1493 | * two buffer heads end IO at almost the same time and both | ||
1494 | * decide that the page is now completely done. | ||
1495 | */ | ||
1496 | first = page_buffers(page); | ||
1497 | local_irq_save(flags); | ||
1498 | bit_spin_lock(BH_Uptodate_Lock, &first->b_state); | ||
1499 | clear_buffer_async_read(bh); | ||
1500 | unlock_buffer(bh); | ||
1501 | tmp = bh; | ||
1502 | do { | ||
1503 | if (!buffer_uptodate(tmp)) | ||
1504 | page_uptodate = 0; | ||
1505 | if (buffer_async_read(tmp)) { | ||
1506 | BUG_ON(!buffer_locked(tmp)); | ||
1507 | goto still_busy; | ||
1508 | } | ||
1509 | tmp = tmp->b_this_page; | ||
1510 | } while (tmp != bh); | ||
1511 | bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | ||
1512 | local_irq_restore(flags); | ||
1513 | |||
1514 | /* | ||
1515 | * If none of the buffers had errors and they are all | ||
1516 | * uptodate then we can set the page uptodate. | ||
1517 | */ | ||
1518 | if (page_uptodate && !PageError(page)) | ||
1519 | SetPageUptodate(page); | ||
1520 | unlock_page(page); | ||
1521 | return; | ||
1522 | |||
1523 | still_busy: | ||
1524 | bit_spin_unlock(BH_Uptodate_Lock, &first->b_state); | ||
1525 | local_irq_restore(flags); | ||
1526 | return; | ||
1527 | } | ||
1528 | |||
1529 | /* | ||
1530 | * Generic "read page" function for block devices that have the normal | ||
1531 | * get_block functionality. This is most of the block device filesystems. | ||
1532 | * Reads the page asynchronously --- the unlock_buffer() and | ||
1533 | * set/clear_buffer_uptodate() functions propagate buffer state into the | ||
1534 | * page struct once IO has completed. | ||
1535 | */ | ||
1536 | int btrfs_readpage(struct file *file, struct page *page) | ||
1399 | { | 1537 | { |
1400 | return mpage_readpage(page, btrfs_get_block); | 1538 | struct inode *inode = page->mapping->host; |
1539 | sector_t iblock, lblock; | ||
1540 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | ||
1541 | unsigned int blocksize; | ||
1542 | int nr, i; | ||
1543 | int fully_mapped = 1; | ||
1544 | |||
1545 | BUG_ON(!PageLocked(page)); | ||
1546 | blocksize = 1 << inode->i_blkbits; | ||
1547 | if (!page_has_buffers(page)) | ||
1548 | create_empty_buffers(page, blocksize, 0); | ||
1549 | head = page_buffers(page); | ||
1550 | |||
1551 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1552 | lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; | ||
1553 | bh = head; | ||
1554 | nr = 0; | ||
1555 | i = 0; | ||
1556 | |||
1557 | do { | ||
1558 | if (buffer_uptodate(bh)) | ||
1559 | continue; | ||
1560 | |||
1561 | if (!buffer_mapped(bh)) { | ||
1562 | int err = 0; | ||
1563 | |||
1564 | fully_mapped = 0; | ||
1565 | if (iblock < lblock) { | ||
1566 | WARN_ON(bh->b_size != blocksize); | ||
1567 | err = btrfs_get_block_csum(inode, iblock, | ||
1568 | bh, 0); | ||
1569 | if (err) | ||
1570 | SetPageError(page); | ||
1571 | } | ||
1572 | if (!buffer_mapped(bh)) { | ||
1573 | void *kaddr = kmap_atomic(page, KM_USER0); | ||
1574 | memset(kaddr + i * blocksize, 0, blocksize); | ||
1575 | flush_dcache_page(page); | ||
1576 | kunmap_atomic(kaddr, KM_USER0); | ||
1577 | if (!err) | ||
1578 | set_buffer_uptodate(bh); | ||
1579 | continue; | ||
1580 | } | ||
1581 | /* | ||
1582 | * get_block() might have updated the buffer | ||
1583 | * synchronously | ||
1584 | */ | ||
1585 | if (buffer_uptodate(bh)) | ||
1586 | continue; | ||
1587 | } | ||
1588 | arr[nr++] = bh; | ||
1589 | } while (i++, iblock++, (bh = bh->b_this_page) != head); | ||
1590 | |||
1591 | if (fully_mapped) | ||
1592 | SetPageMappedToDisk(page); | ||
1593 | |||
1594 | if (!nr) { | ||
1595 | /* | ||
1596 | * All buffers are uptodate - we can set the page uptodate | ||
1597 | * as well. But not if get_block() returned an error. | ||
1598 | */ | ||
1599 | if (!PageError(page)) | ||
1600 | SetPageUptodate(page); | ||
1601 | unlock_page(page); | ||
1602 | return 0; | ||
1603 | } | ||
1604 | |||
1605 | /* Stage two: lock the buffers */ | ||
1606 | for (i = 0; i < nr; i++) { | ||
1607 | bh = arr[i]; | ||
1608 | lock_buffer(bh); | ||
1609 | bh->b_end_io = btrfs_end_buffer_async_read; | ||
1610 | set_buffer_async_read(bh); | ||
1611 | } | ||
1612 | |||
1613 | /* | ||
1614 | * Stage 3: start the IO. Check for uptodateness | ||
1615 | * inside the buffer lock in case another process reading | ||
1616 | * the underlying blockdev brought it uptodate (the sct fix). | ||
1617 | */ | ||
1618 | for (i = 0; i < nr; i++) { | ||
1619 | bh = arr[i]; | ||
1620 | if (buffer_uptodate(bh)) | ||
1621 | btrfs_end_buffer_async_read(bh, 1); | ||
1622 | else | ||
1623 | submit_bh(READ, bh); | ||
1624 | } | ||
1625 | return 0; | ||
1401 | } | 1626 | } |
1402 | 1627 | ||
1403 | /* | 1628 | /* |
@@ -1424,6 +1649,7 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, | |||
1424 | struct buffer_head *bh, *head; | 1649 | struct buffer_head *bh, *head; |
1425 | const unsigned blocksize = 1 << inode->i_blkbits; | 1650 | const unsigned blocksize = 1 << inode->i_blkbits; |
1426 | int nr_underway = 0; | 1651 | int nr_underway = 0; |
1652 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1427 | 1653 | ||
1428 | BUG_ON(!PageLocked(page)); | 1654 | BUG_ON(!PageLocked(page)); |
1429 | 1655 | ||
@@ -1496,6 +1722,24 @@ static int __btrfs_write_full_page(struct inode *inode, struct page *page, | |||
1496 | continue; | 1722 | continue; |
1497 | } | 1723 | } |
1498 | if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { | 1724 | if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) { |
1725 | struct btrfs_trans_handle *trans; | ||
1726 | int ret; | ||
1727 | u64 off = page->index << PAGE_CACHE_SHIFT; | ||
1728 | char *kaddr; | ||
1729 | |||
1730 | off += bh_offset(bh); | ||
1731 | mutex_lock(&root->fs_info->fs_mutex); | ||
1732 | trans = btrfs_start_transaction(root, 1); | ||
1733 | btrfs_set_trans_block_group(trans, inode); | ||
1734 | kaddr = kmap(page); | ||
1735 | ret = btrfs_csum_file_block(trans, root, inode->i_ino, | ||
1736 | off, kaddr + bh_offset(bh), | ||
1737 | bh->b_size); | ||
1738 | kunmap(page); | ||
1739 | BUG_ON(ret); | ||
1740 | ret = btrfs_end_transaction(trans, root); | ||
1741 | BUG_ON(ret); | ||
1742 | mutex_unlock(&root->fs_info->fs_mutex); | ||
1499 | mark_buffer_async_write(bh); | 1743 | mark_buffer_async_write(bh); |
1500 | } else { | 1744 | } else { |
1501 | unlock_buffer(bh); | 1745 | unlock_buffer(bh); |
@@ -1617,6 +1861,53 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc) | |||
1617 | return __btrfs_write_full_page(inode, page, wbc); | 1861 | return __btrfs_write_full_page(inode, page, wbc); |
1618 | } | 1862 | } |
1619 | 1863 | ||
1864 | /* | ||
1865 | * btrfs_page_mkwrite() is not allowed to change the file size as it gets | ||
1866 | * called from a page fault handler when a page is first dirtied. Hence we must | ||
1867 | * be careful to check for EOF conditions here. We set the page up correctly | ||
1868 | * for a written page which means we get ENOSPC checking when writing into | ||
1869 | * holes and correct delalloc and unwritten extent mapping on filesystems that | ||
1870 | * support these features. | ||
1871 | * | ||
1872 | * We are not allowed to take the i_mutex here so we have to play games to | ||
1873 | * protect against truncate races as the page could now be beyond EOF. Because | ||
1874 | * vmtruncate() writes the inode size before removing pages, once we have the | ||
1875 | * page lock we can determine safely if the page is beyond EOF. If it is not | ||
1876 | * beyond EOF, then the page is guaranteed safe against truncation until we | ||
1877 | * unlock the page. | ||
1878 | */ | ||
1879 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) | ||
1880 | { | ||
1881 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | ||
1882 | unsigned long end; | ||
1883 | loff_t size; | ||
1884 | int ret = -EINVAL; | ||
1885 | |||
1886 | lock_page(page); | ||
1887 | wait_on_page_writeback(page); | ||
1888 | printk("btrfs_page_mkwrite %lu %lu\n", page->mapping->host->i_ino, page->index); | ||
1889 | size = i_size_read(inode); | ||
1890 | if ((page->mapping != inode->i_mapping) || | ||
1891 | ((page->index << PAGE_CACHE_SHIFT) > size)) { | ||
1892 | /* page got truncated out from underneath us */ | ||
1893 | goto out_unlock; | ||
1894 | } | ||
1895 | |||
1896 | /* page is wholly or partially inside EOF */ | ||
1897 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > size) | ||
1898 | end = size & ~PAGE_CACHE_MASK; | ||
1899 | else | ||
1900 | end = PAGE_CACHE_SIZE; | ||
1901 | |||
1902 | ret = btrfs_prepare_write(NULL, page, 0, end); | ||
1903 | if (!ret) | ||
1904 | ret = btrfs_commit_write(NULL, page, 0, end); | ||
1905 | |||
1906 | out_unlock: | ||
1907 | unlock_page(page); | ||
1908 | return ret; | ||
1909 | } | ||
1910 | |||
1620 | static void btrfs_truncate(struct inode *inode) | 1911 | static void btrfs_truncate(struct inode *inode) |
1621 | { | 1912 | { |
1622 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1913 | struct btrfs_root *root = BTRFS_I(inode)->root; |