diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 145 |
1 files changed, 113 insertions, 32 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..ec0aca8ba6bf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head) | |||
911 | attach_page_buffers(page, head); | 911 | attach_page_buffers(page, head); |
912 | } | 912 | } |
913 | 913 | ||
914 | static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) | ||
915 | { | ||
916 | sector_t retval = ~((sector_t)0); | ||
917 | loff_t sz = i_size_read(bdev->bd_inode); | ||
918 | |||
919 | if (sz) { | ||
920 | unsigned int sizebits = blksize_bits(size); | ||
921 | retval = (sz >> sizebits); | ||
922 | } | ||
923 | return retval; | ||
924 | } | ||
925 | |||
914 | /* | 926 | /* |
915 | * Initialise the state of a blockdev page's buffers. | 927 | * Initialise the state of a blockdev page's buffers. |
916 | */ | 928 | */ |
@@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
921 | struct buffer_head *head = page_buffers(page); | 933 | struct buffer_head *head = page_buffers(page); |
922 | struct buffer_head *bh = head; | 934 | struct buffer_head *bh = head; |
923 | int uptodate = PageUptodate(page); | 935 | int uptodate = PageUptodate(page); |
924 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); | 936 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); |
925 | 937 | ||
926 | do { | 938 | do { |
927 | if (!buffer_mapped(bh)) { | 939 | if (!buffer_mapped(bh)) { |
@@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) | |||
1553 | EXPORT_SYMBOL(unmap_underlying_metadata); | 1565 | EXPORT_SYMBOL(unmap_underlying_metadata); |
1554 | 1566 | ||
1555 | /* | 1567 | /* |
1568 | * Size is a power-of-two in the range 512..PAGE_SIZE, | ||
1569 | * and the case we care about most is PAGE_SIZE. | ||
1570 | * | ||
1571 | * So this *could* possibly be written with those | ||
1572 | * constraints in mind (relevant mostly if some | ||
1573 | * architecture has a slow bit-scan instruction) | ||
1574 | */ | ||
1575 | static inline int block_size_bits(unsigned int blocksize) | ||
1576 | { | ||
1577 | return ilog2(blocksize); | ||
1578 | } | ||
1579 | |||
1580 | static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) | ||
1581 | { | ||
1582 | BUG_ON(!PageLocked(page)); | ||
1583 | |||
1584 | if (!page_has_buffers(page)) | ||
1585 | create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); | ||
1586 | return page_buffers(page); | ||
1587 | } | ||
1588 | |||
1589 | /* | ||
1556 | * NOTE! All mapped/uptodate combinations are valid: | 1590 | * NOTE! All mapped/uptodate combinations are valid: |
1557 | * | 1591 | * |
1558 | * Mapped Uptodate Meaning | 1592 | * Mapped Uptodate Meaning |
@@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1589 | sector_t block; | 1623 | sector_t block; |
1590 | sector_t last_block; | 1624 | sector_t last_block; |
1591 | struct buffer_head *bh, *head; | 1625 | struct buffer_head *bh, *head; |
1592 | const unsigned blocksize = 1 << inode->i_blkbits; | 1626 | unsigned int blocksize, bbits; |
1593 | int nr_underway = 0; | 1627 | int nr_underway = 0; |
1594 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1628 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1595 | WRITE_SYNC : WRITE); | 1629 | WRITE_SYNC : WRITE); |
1596 | 1630 | ||
1597 | BUG_ON(!PageLocked(page)); | 1631 | head = create_page_buffers(page, inode, |
1598 | |||
1599 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | ||
1600 | |||
1601 | if (!page_has_buffers(page)) { | ||
1602 | create_empty_buffers(page, blocksize, | ||
1603 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1632 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1604 | } | ||
1605 | 1633 | ||
1606 | /* | 1634 | /* |
1607 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | 1635 | * Be very careful. We have no exclusion from __set_page_dirty_buffers |
@@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1613 | * handle that here by just cleaning them. | 1641 | * handle that here by just cleaning them. |
1614 | */ | 1642 | */ |
1615 | 1643 | ||
1616 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1617 | head = page_buffers(page); | ||
1618 | bh = head; | 1644 | bh = head; |
1645 | blocksize = bh->b_size; | ||
1646 | bbits = block_size_bits(blocksize); | ||
1647 | |||
1648 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | ||
1649 | last_block = (i_size_read(inode) - 1) >> bbits; | ||
1619 | 1650 | ||
1620 | /* | 1651 | /* |
1621 | * Get all the dirty buffers mapped to disk addresses and | 1652 | * Get all the dirty buffers mapped to disk addresses and |
@@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1806 | BUG_ON(to > PAGE_CACHE_SIZE); | 1837 | BUG_ON(to > PAGE_CACHE_SIZE); |
1807 | BUG_ON(from > to); | 1838 | BUG_ON(from > to); |
1808 | 1839 | ||
1809 | blocksize = 1 << inode->i_blkbits; | 1840 | head = create_page_buffers(page, inode, 0); |
1810 | if (!page_has_buffers(page)) | 1841 | blocksize = head->b_size; |
1811 | create_empty_buffers(page, blocksize, 0); | 1842 | bbits = block_size_bits(blocksize); |
1812 | head = page_buffers(page); | ||
1813 | 1843 | ||
1814 | bbits = inode->i_blkbits; | ||
1815 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | 1844 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
1816 | 1845 | ||
1817 | for(bh = head, block_start = 0; bh != head || !block_start; | 1846 | for(bh = head, block_start = 0; bh != head || !block_start; |
@@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1881 | unsigned blocksize; | 1910 | unsigned blocksize; |
1882 | struct buffer_head *bh, *head; | 1911 | struct buffer_head *bh, *head; |
1883 | 1912 | ||
1884 | blocksize = 1 << inode->i_blkbits; | 1913 | bh = head = page_buffers(page); |
1914 | blocksize = bh->b_size; | ||
1885 | 1915 | ||
1886 | for(bh = head = page_buffers(page), block_start = 0; | 1916 | block_start = 0; |
1887 | bh != head || !block_start; | 1917 | do { |
1888 | block_start=block_end, bh = bh->b_this_page) { | ||
1889 | block_end = block_start + blocksize; | 1918 | block_end = block_start + blocksize; |
1890 | if (block_end <= from || block_start >= to) { | 1919 | if (block_end <= from || block_start >= to) { |
1891 | if (!buffer_uptodate(bh)) | 1920 | if (!buffer_uptodate(bh)) |
@@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1895 | mark_buffer_dirty(bh); | 1924 | mark_buffer_dirty(bh); |
1896 | } | 1925 | } |
1897 | clear_buffer_new(bh); | 1926 | clear_buffer_new(bh); |
1898 | } | 1927 | |
1928 | block_start = block_end; | ||
1929 | bh = bh->b_this_page; | ||
1930 | } while (bh != head); | ||
1899 | 1931 | ||
1900 | /* | 1932 | /* |
1901 | * If this is a partial write which happened to make all buffers | 1933 | * If this is a partial write which happened to make all buffers |
@@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end); | |||
2020 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | 2052 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, |
2021 | unsigned long from) | 2053 | unsigned long from) |
2022 | { | 2054 | { |
2023 | struct inode *inode = page->mapping->host; | ||
2024 | unsigned block_start, block_end, blocksize; | 2055 | unsigned block_start, block_end, blocksize; |
2025 | unsigned to; | 2056 | unsigned to; |
2026 | struct buffer_head *bh, *head; | 2057 | struct buffer_head *bh, *head; |
@@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | |||
2029 | if (!page_has_buffers(page)) | 2060 | if (!page_has_buffers(page)) |
2030 | return 0; | 2061 | return 0; |
2031 | 2062 | ||
2032 | blocksize = 1 << inode->i_blkbits; | 2063 | head = page_buffers(page); |
2064 | blocksize = head->b_size; | ||
2033 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); | 2065 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); |
2034 | to = from + to; | 2066 | to = from + to; |
2035 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) | 2067 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) |
2036 | return 0; | 2068 | return 0; |
2037 | 2069 | ||
2038 | head = page_buffers(page); | ||
2039 | bh = head; | 2070 | bh = head; |
2040 | block_start = 0; | 2071 | block_start = 0; |
2041 | do { | 2072 | do { |
@@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
2068 | struct inode *inode = page->mapping->host; | 2099 | struct inode *inode = page->mapping->host; |
2069 | sector_t iblock, lblock; | 2100 | sector_t iblock, lblock; |
2070 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | 2101 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; |
2071 | unsigned int blocksize; | 2102 | unsigned int blocksize, bbits; |
2072 | int nr, i; | 2103 | int nr, i; |
2073 | int fully_mapped = 1; | 2104 | int fully_mapped = 1; |
2074 | 2105 | ||
2075 | BUG_ON(!PageLocked(page)); | 2106 | head = create_page_buffers(page, inode, 0); |
2076 | blocksize = 1 << inode->i_blkbits; | 2107 | blocksize = head->b_size; |
2077 | if (!page_has_buffers(page)) | 2108 | bbits = block_size_bits(blocksize); |
2078 | create_empty_buffers(page, blocksize, 0); | ||
2079 | head = page_buffers(page); | ||
2080 | 2109 | ||
2081 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2110 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
2082 | lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; | 2111 | lblock = (i_size_read(inode)+blocksize-1) >> bbits; |
2083 | bh = head; | 2112 | bh = head; |
2084 | nr = 0; | 2113 | nr = 0; |
2085 | i = 0; | 2114 | i = 0; |
@@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2864 | bio_put(bio); | 2893 | bio_put(bio); |
2865 | } | 2894 | } |
2866 | 2895 | ||
2896 | /* | ||
2897 | * This allows us to do IO even on the odd last sectors | ||
2898 | * of a device, even if the bh block size is some multiple | ||
2899 | * of the physical sector size. | ||
2900 | * | ||
2901 | * We'll just truncate the bio to the size of the device, | ||
2902 | * and clear the end of the buffer head manually. | ||
2903 | * | ||
2904 | * Truly out-of-range accesses will turn into actual IO | ||
2905 | * errors, this only handles the "we need to be able to | ||
2906 | * do IO at the final sector" case. | ||
2907 | */ | ||
2908 | static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | ||
2909 | { | ||
2910 | sector_t maxsector; | ||
2911 | unsigned bytes; | ||
2912 | |||
2913 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | ||
2914 | if (!maxsector) | ||
2915 | return; | ||
2916 | |||
2917 | /* | ||
2918 | * If the *whole* IO is past the end of the device, | ||
2919 | * let it through, and the IO layer will turn it into | ||
2920 | * an EIO. | ||
2921 | */ | ||
2922 | if (unlikely(bio->bi_sector >= maxsector)) | ||
2923 | return; | ||
2924 | |||
2925 | maxsector -= bio->bi_sector; | ||
2926 | bytes = bio->bi_size; | ||
2927 | if (likely((bytes >> 9) <= maxsector)) | ||
2928 | return; | ||
2929 | |||
2930 | /* Uhhuh. We've got a bh that straddles the device size! */ | ||
2931 | bytes = maxsector << 9; | ||
2932 | |||
2933 | /* Truncate the bio.. */ | ||
2934 | bio->bi_size = bytes; | ||
2935 | bio->bi_io_vec[0].bv_len = bytes; | ||
2936 | |||
2937 | /* ..and clear the end of the buffer for reads */ | ||
2938 | if ((rw & RW_MASK) == READ) { | ||
2939 | void *kaddr = kmap_atomic(bh->b_page); | ||
2940 | memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); | ||
2941 | kunmap_atomic(kaddr); | ||
2942 | } | ||
2943 | } | ||
2944 | |||
2867 | int submit_bh(int rw, struct buffer_head * bh) | 2945 | int submit_bh(int rw, struct buffer_head * bh) |
2868 | { | 2946 | { |
2869 | struct bio *bio; | 2947 | struct bio *bio; |
@@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2900 | bio->bi_end_io = end_bio_bh_io_sync; | 2978 | bio->bi_end_io = end_bio_bh_io_sync; |
2901 | bio->bi_private = bh; | 2979 | bio->bi_private = bh; |
2902 | 2980 | ||
2981 | /* Take care of bh's that straddle the end of the device */ | ||
2982 | guard_bh_eod(rw, bio, bh); | ||
2983 | |||
2903 | bio_get(bio); | 2984 | bio_get(bio); |
2904 | submit_bio(rw, bio); | 2985 | submit_bio(rw, bio); |
2905 | 2986 | ||