aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c145
1 files changed, 113 insertions, 32 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index b5f044283edb..ec0aca8ba6bf 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -911,6 +911,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
911 attach_page_buffers(page, head); 911 attach_page_buffers(page, head);
912} 912}
913 913
914static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
915{
916 sector_t retval = ~((sector_t)0);
917 loff_t sz = i_size_read(bdev->bd_inode);
918
919 if (sz) {
920 unsigned int sizebits = blksize_bits(size);
921 retval = (sz >> sizebits);
922 }
923 return retval;
924}
925
914/* 926/*
915 * Initialise the state of a blockdev page's buffers. 927 * Initialise the state of a blockdev page's buffers.
916 */ 928 */
@@ -921,7 +933,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
921 struct buffer_head *head = page_buffers(page); 933 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head; 934 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page); 935 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); 936 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
925 937
926 do { 938 do {
927 if (!buffer_mapped(bh)) { 939 if (!buffer_mapped(bh)) {
@@ -1553,6 +1565,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1553EXPORT_SYMBOL(unmap_underlying_metadata); 1565EXPORT_SYMBOL(unmap_underlying_metadata);
1554 1566
1555/* 1567/*
1568 * Size is a power-of-two in the range 512..PAGE_SIZE,
1569 * and the case we care about most is PAGE_SIZE.
1570 *
1571 * So this *could* possibly be written with those
1572 * constraints in mind (relevant mostly if some
1573 * architecture has a slow bit-scan instruction)
1574 */
1575static inline int block_size_bits(unsigned int blocksize)
1576{
1577 return ilog2(blocksize);
1578}
1579
1580static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1581{
1582 BUG_ON(!PageLocked(page));
1583
1584 if (!page_has_buffers(page))
1585 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1586 return page_buffers(page);
1587}
1588
1589/*
1556 * NOTE! All mapped/uptodate combinations are valid: 1590 * NOTE! All mapped/uptodate combinations are valid:
1557 * 1591 *
1558 * Mapped Uptodate Meaning 1592 * Mapped Uptodate Meaning
@@ -1589,19 +1623,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1589 sector_t block; 1623 sector_t block;
1590 sector_t last_block; 1624 sector_t last_block;
1591 struct buffer_head *bh, *head; 1625 struct buffer_head *bh, *head;
1592 const unsigned blocksize = 1 << inode->i_blkbits; 1626 unsigned int blocksize, bbits;
1593 int nr_underway = 0; 1627 int nr_underway = 0;
1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1628 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1595 WRITE_SYNC : WRITE); 1629 WRITE_SYNC : WRITE);
1596 1630
1597 BUG_ON(!PageLocked(page)); 1631 head = create_page_buffers(page, inode,
1598
1599 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1600
1601 if (!page_has_buffers(page)) {
1602 create_empty_buffers(page, blocksize,
1603 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1632 (1 << BH_Dirty)|(1 << BH_Uptodate));
1604 }
1605 1633
1606 /* 1634 /*
1607 * Be very careful. We have no exclusion from __set_page_dirty_buffers 1635 * Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -1613,9 +1641,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1613 * handle that here by just cleaning them. 1641 * handle that here by just cleaning them.
1614 */ 1642 */
1615 1643
1616 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1617 head = page_buffers(page);
1618 bh = head; 1644 bh = head;
1645 blocksize = bh->b_size;
1646 bbits = block_size_bits(blocksize);
1647
1648 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1649 last_block = (i_size_read(inode) - 1) >> bbits;
1619 1650
1620 /* 1651 /*
1621 * Get all the dirty buffers mapped to disk addresses and 1652 * Get all the dirty buffers mapped to disk addresses and
@@ -1806,12 +1837,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1806 BUG_ON(to > PAGE_CACHE_SIZE); 1837 BUG_ON(to > PAGE_CACHE_SIZE);
1807 BUG_ON(from > to); 1838 BUG_ON(from > to);
1808 1839
1809 blocksize = 1 << inode->i_blkbits; 1840 head = create_page_buffers(page, inode, 0);
1810 if (!page_has_buffers(page)) 1841 blocksize = head->b_size;
1811 create_empty_buffers(page, blocksize, 0); 1842 bbits = block_size_bits(blocksize);
1812 head = page_buffers(page);
1813 1843
1814 bbits = inode->i_blkbits;
1815 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 1844 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1816 1845
1817 for(bh = head, block_start = 0; bh != head || !block_start; 1846 for(bh = head, block_start = 0; bh != head || !block_start;
@@ -1881,11 +1910,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1881 unsigned blocksize; 1910 unsigned blocksize;
1882 struct buffer_head *bh, *head; 1911 struct buffer_head *bh, *head;
1883 1912
1884 blocksize = 1 << inode->i_blkbits; 1913 bh = head = page_buffers(page);
1914 blocksize = bh->b_size;
1885 1915
1886 for(bh = head = page_buffers(page), block_start = 0; 1916 block_start = 0;
1887 bh != head || !block_start; 1917 do {
1888 block_start=block_end, bh = bh->b_this_page) {
1889 block_end = block_start + blocksize; 1918 block_end = block_start + blocksize;
1890 if (block_end <= from || block_start >= to) { 1919 if (block_end <= from || block_start >= to) {
1891 if (!buffer_uptodate(bh)) 1920 if (!buffer_uptodate(bh))
@@ -1895,7 +1924,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1895 mark_buffer_dirty(bh); 1924 mark_buffer_dirty(bh);
1896 } 1925 }
1897 clear_buffer_new(bh); 1926 clear_buffer_new(bh);
1898 } 1927
1928 block_start = block_end;
1929 bh = bh->b_this_page;
1930 } while (bh != head);
1899 1931
1900 /* 1932 /*
1901 * If this is a partial write which happened to make all buffers 1933 * If this is a partial write which happened to make all buffers
@@ -2020,7 +2052,6 @@ EXPORT_SYMBOL(generic_write_end);
2020int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2052int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2021 unsigned long from) 2053 unsigned long from)
2022{ 2054{
2023 struct inode *inode = page->mapping->host;
2024 unsigned block_start, block_end, blocksize; 2055 unsigned block_start, block_end, blocksize;
2025 unsigned to; 2056 unsigned to;
2026 struct buffer_head *bh, *head; 2057 struct buffer_head *bh, *head;
@@ -2029,13 +2060,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2029 if (!page_has_buffers(page)) 2060 if (!page_has_buffers(page))
2030 return 0; 2061 return 0;
2031 2062
2032 blocksize = 1 << inode->i_blkbits; 2063 head = page_buffers(page);
2064 blocksize = head->b_size;
2033 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2065 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2034 to = from + to; 2066 to = from + to;
2035 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2067 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2036 return 0; 2068 return 0;
2037 2069
2038 head = page_buffers(page);
2039 bh = head; 2070 bh = head;
2040 block_start = 0; 2071 block_start = 0;
2041 do { 2072 do {
@@ -2068,18 +2099,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2068 struct inode *inode = page->mapping->host; 2099 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock; 2100 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 2101 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize; 2102 unsigned int blocksize, bbits;
2072 int nr, i; 2103 int nr, i;
2073 int fully_mapped = 1; 2104 int fully_mapped = 1;
2074 2105
2075 BUG_ON(!PageLocked(page)); 2106 head = create_page_buffers(page, inode, 0);
2076 blocksize = 1 << inode->i_blkbits; 2107 blocksize = head->b_size;
2077 if (!page_has_buffers(page)) 2108 bbits = block_size_bits(blocksize);
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080 2109
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2110 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; 2111 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2083 bh = head; 2112 bh = head;
2084 nr = 0; 2113 nr = 0;
2085 i = 0; 2114 i = 0;
@@ -2864,6 +2893,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2864 bio_put(bio); 2893 bio_put(bio);
2865} 2894}
2866 2895
2896/*
2897 * This allows us to do IO even on the odd last sectors
2898 * of a device, even if the bh block size is some multiple
2899 * of the physical sector size.
2900 *
2901 * We'll just truncate the bio to the size of the device,
2902 * and clear the end of the buffer head manually.
2903 *
2904 * Truly out-of-range accesses will turn into actual IO
2905 * errors, this only handles the "we need to be able to
2906 * do IO at the final sector" case.
2907 */
2908static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2909{
2910 sector_t maxsector;
2911 unsigned bytes;
2912
2913 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2914 if (!maxsector)
2915 return;
2916
2917 /*
2918 * If the *whole* IO is past the end of the device,
2919 * let it through, and the IO layer will turn it into
2920 * an EIO.
2921 */
2922 if (unlikely(bio->bi_sector >= maxsector))
2923 return;
2924
2925 maxsector -= bio->bi_sector;
2926 bytes = bio->bi_size;
2927 if (likely((bytes >> 9) <= maxsector))
2928 return;
2929
2930 /* Uhhuh. We've got a bh that straddles the device size! */
2931 bytes = maxsector << 9;
2932
2933 /* Truncate the bio.. */
2934 bio->bi_size = bytes;
2935 bio->bi_io_vec[0].bv_len = bytes;
2936
2937 /* ..and clear the end of the buffer for reads */
2938 if ((rw & RW_MASK) == READ) {
2939 void *kaddr = kmap_atomic(bh->b_page);
2940 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2941 kunmap_atomic(kaddr);
2942 }
2943}
2944
2867int submit_bh(int rw, struct buffer_head * bh) 2945int submit_bh(int rw, struct buffer_head * bh)
2868{ 2946{
2869 struct bio *bio; 2947 struct bio *bio;
@@ -2900,6 +2978,9 @@ int submit_bh(int rw, struct buffer_head * bh)
2900 bio->bi_end_io = end_bio_bh_io_sync; 2978 bio->bi_end_io = end_bio_bh_io_sync;
2901 bio->bi_private = bh; 2979 bio->bi_private = bh;
2902 2980
2981 /* Take care of bh's that straddle the end of the device */
2982 guard_bh_eod(rw, bio, bh);
2983
2903 bio_get(bio); 2984 bio_get(bio);
2904 submit_bio(rw, bio); 2985 submit_bio(rw, bio);
2905 2986