aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c164
1 files changed, 121 insertions, 43 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index b5f044283edb..7a75c3e0fd58 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -46,8 +46,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
46 46
47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 47#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
48 48
49inline void 49void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
50init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
51{ 50{
52 bh->b_end_io = handler; 51 bh->b_end_io = handler;
53 bh->b_private = private; 52 bh->b_private = private;
@@ -555,7 +554,7 @@ void emergency_thaw_all(void)
555 */ 554 */
556int sync_mapping_buffers(struct address_space *mapping) 555int sync_mapping_buffers(struct address_space *mapping)
557{ 556{
558 struct address_space *buffer_mapping = mapping->assoc_mapping; 557 struct address_space *buffer_mapping = mapping->private_data;
559 558
560 if (buffer_mapping == NULL || list_empty(&mapping->private_list)) 559 if (buffer_mapping == NULL || list_empty(&mapping->private_list))
561 return 0; 560 return 0;
@@ -588,10 +587,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
588 struct address_space *buffer_mapping = bh->b_page->mapping; 587 struct address_space *buffer_mapping = bh->b_page->mapping;
589 588
590 mark_buffer_dirty(bh); 589 mark_buffer_dirty(bh);
591 if (!mapping->assoc_mapping) { 590 if (!mapping->private_data) {
592 mapping->assoc_mapping = buffer_mapping; 591 mapping->private_data = buffer_mapping;
593 } else { 592 } else {
594 BUG_ON(mapping->assoc_mapping != buffer_mapping); 593 BUG_ON(mapping->private_data != buffer_mapping);
595 } 594 }
596 if (!bh->b_assoc_map) { 595 if (!bh->b_assoc_map) {
597 spin_lock(&buffer_mapping->private_lock); 596 spin_lock(&buffer_mapping->private_lock);
@@ -788,7 +787,7 @@ void invalidate_inode_buffers(struct inode *inode)
788 if (inode_has_buffers(inode)) { 787 if (inode_has_buffers(inode)) {
789 struct address_space *mapping = &inode->i_data; 788 struct address_space *mapping = &inode->i_data;
790 struct list_head *list = &mapping->private_list; 789 struct list_head *list = &mapping->private_list;
791 struct address_space *buffer_mapping = mapping->assoc_mapping; 790 struct address_space *buffer_mapping = mapping->private_data;
792 791
793 spin_lock(&buffer_mapping->private_lock); 792 spin_lock(&buffer_mapping->private_lock);
794 while (!list_empty(list)) 793 while (!list_empty(list))
@@ -811,7 +810,7 @@ int remove_inode_buffers(struct inode *inode)
811 if (inode_has_buffers(inode)) { 810 if (inode_has_buffers(inode)) {
812 struct address_space *mapping = &inode->i_data; 811 struct address_space *mapping = &inode->i_data;
813 struct list_head *list = &mapping->private_list; 812 struct list_head *list = &mapping->private_list;
814 struct address_space *buffer_mapping = mapping->assoc_mapping; 813 struct address_space *buffer_mapping = mapping->private_data;
815 814
816 spin_lock(&buffer_mapping->private_lock); 815 spin_lock(&buffer_mapping->private_lock);
817 while (!list_empty(list)) { 816 while (!list_empty(list)) {
@@ -850,13 +849,10 @@ try_again:
850 if (!bh) 849 if (!bh)
851 goto no_grow; 850 goto no_grow;
852 851
853 bh->b_bdev = NULL;
854 bh->b_this_page = head; 852 bh->b_this_page = head;
855 bh->b_blocknr = -1; 853 bh->b_blocknr = -1;
856 head = bh; 854 head = bh;
857 855
858 bh->b_state = 0;
859 atomic_set(&bh->b_count, 0);
860 bh->b_size = size; 856 bh->b_size = size;
861 857
862 /* Link the buffer to its page */ 858 /* Link the buffer to its page */
@@ -911,6 +907,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head)
911 attach_page_buffers(page, head); 907 attach_page_buffers(page, head);
912} 908}
913 909
910static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
911{
912 sector_t retval = ~((sector_t)0);
913 loff_t sz = i_size_read(bdev->bd_inode);
914
915 if (sz) {
916 unsigned int sizebits = blksize_bits(size);
917 retval = (sz >> sizebits);
918 }
919 return retval;
920}
921
914/* 922/*
915 * Initialise the state of a blockdev page's buffers. 923 * Initialise the state of a blockdev page's buffers.
916 */ 924 */
@@ -921,7 +929,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
921 struct buffer_head *head = page_buffers(page); 929 struct buffer_head *head = page_buffers(page);
922 struct buffer_head *bh = head; 930 struct buffer_head *bh = head;
923 int uptodate = PageUptodate(page); 931 int uptodate = PageUptodate(page);
924 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); 932 sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
925 933
926 do { 934 do {
927 if (!buffer_mapped(bh)) { 935 if (!buffer_mapped(bh)) {
@@ -1553,6 +1561,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
1553EXPORT_SYMBOL(unmap_underlying_metadata); 1561EXPORT_SYMBOL(unmap_underlying_metadata);
1554 1562
1555/* 1563/*
1564 * Size is a power-of-two in the range 512..PAGE_SIZE,
1565 * and the case we care about most is PAGE_SIZE.
1566 *
1567 * So this *could* possibly be written with those
1568 * constraints in mind (relevant mostly if some
1569 * architecture has a slow bit-scan instruction)
1570 */
1571static inline int block_size_bits(unsigned int blocksize)
1572{
1573 return ilog2(blocksize);
1574}
1575
1576static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
1577{
1578 BUG_ON(!PageLocked(page));
1579
1580 if (!page_has_buffers(page))
1581 create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state);
1582 return page_buffers(page);
1583}
1584
1585/*
1556 * NOTE! All mapped/uptodate combinations are valid: 1586 * NOTE! All mapped/uptodate combinations are valid:
1557 * 1587 *
1558 * Mapped Uptodate Meaning 1588 * Mapped Uptodate Meaning
@@ -1589,19 +1619,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1589 sector_t block; 1619 sector_t block;
1590 sector_t last_block; 1620 sector_t last_block;
1591 struct buffer_head *bh, *head; 1621 struct buffer_head *bh, *head;
1592 const unsigned blocksize = 1 << inode->i_blkbits; 1622 unsigned int blocksize, bbits;
1593 int nr_underway = 0; 1623 int nr_underway = 0;
1594 int write_op = (wbc->sync_mode == WB_SYNC_ALL ? 1624 int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
1595 WRITE_SYNC : WRITE); 1625 WRITE_SYNC : WRITE);
1596 1626
1597 BUG_ON(!PageLocked(page)); 1627 head = create_page_buffers(page, inode,
1598
1599 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1600
1601 if (!page_has_buffers(page)) {
1602 create_empty_buffers(page, blocksize,
1603 (1 << BH_Dirty)|(1 << BH_Uptodate)); 1628 (1 << BH_Dirty)|(1 << BH_Uptodate));
1604 }
1605 1629
1606 /* 1630 /*
1607 * Be very careful. We have no exclusion from __set_page_dirty_buffers 1631 * Be very careful. We have no exclusion from __set_page_dirty_buffers
@@ -1613,9 +1637,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1613 * handle that here by just cleaning them. 1637 * handle that here by just cleaning them.
1614 */ 1638 */
1615 1639
1616 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1617 head = page_buffers(page);
1618 bh = head; 1640 bh = head;
1641 blocksize = bh->b_size;
1642 bbits = block_size_bits(blocksize);
1643
1644 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1645 last_block = (i_size_read(inode) - 1) >> bbits;
1619 1646
1620 /* 1647 /*
1621 * Get all the dirty buffers mapped to disk addresses and 1648 * Get all the dirty buffers mapped to disk addresses and
@@ -1806,12 +1833,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1806 BUG_ON(to > PAGE_CACHE_SIZE); 1833 BUG_ON(to > PAGE_CACHE_SIZE);
1807 BUG_ON(from > to); 1834 BUG_ON(from > to);
1808 1835
1809 blocksize = 1 << inode->i_blkbits; 1836 head = create_page_buffers(page, inode, 0);
1810 if (!page_has_buffers(page)) 1837 blocksize = head->b_size;
1811 create_empty_buffers(page, blocksize, 0); 1838 bbits = block_size_bits(blocksize);
1812 head = page_buffers(page);
1813 1839
1814 bbits = inode->i_blkbits;
1815 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); 1840 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
1816 1841
1817 for(bh = head, block_start = 0; bh != head || !block_start; 1842 for(bh = head, block_start = 0; bh != head || !block_start;
@@ -1881,11 +1906,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1881 unsigned blocksize; 1906 unsigned blocksize;
1882 struct buffer_head *bh, *head; 1907 struct buffer_head *bh, *head;
1883 1908
1884 blocksize = 1 << inode->i_blkbits; 1909 bh = head = page_buffers(page);
1910 blocksize = bh->b_size;
1885 1911
1886 for(bh = head = page_buffers(page), block_start = 0; 1912 block_start = 0;
1887 bh != head || !block_start; 1913 do {
1888 block_start=block_end, bh = bh->b_this_page) {
1889 block_end = block_start + blocksize; 1914 block_end = block_start + blocksize;
1890 if (block_end <= from || block_start >= to) { 1915 if (block_end <= from || block_start >= to) {
1891 if (!buffer_uptodate(bh)) 1916 if (!buffer_uptodate(bh))
@@ -1895,7 +1920,10 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1895 mark_buffer_dirty(bh); 1920 mark_buffer_dirty(bh);
1896 } 1921 }
1897 clear_buffer_new(bh); 1922 clear_buffer_new(bh);
1898 } 1923
1924 block_start = block_end;
1925 bh = bh->b_this_page;
1926 } while (bh != head);
1899 1927
1900 /* 1928 /*
1901 * If this is a partial write which happened to make all buffers 1929 * If this is a partial write which happened to make all buffers
@@ -2020,7 +2048,6 @@ EXPORT_SYMBOL(generic_write_end);
2020int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2048int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2021 unsigned long from) 2049 unsigned long from)
2022{ 2050{
2023 struct inode *inode = page->mapping->host;
2024 unsigned block_start, block_end, blocksize; 2051 unsigned block_start, block_end, blocksize;
2025 unsigned to; 2052 unsigned to;
2026 struct buffer_head *bh, *head; 2053 struct buffer_head *bh, *head;
@@ -2029,13 +2056,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2029 if (!page_has_buffers(page)) 2056 if (!page_has_buffers(page))
2030 return 0; 2057 return 0;
2031 2058
2032 blocksize = 1 << inode->i_blkbits; 2059 head = page_buffers(page);
2060 blocksize = head->b_size;
2033 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2061 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2034 to = from + to; 2062 to = from + to;
2035 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2063 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2036 return 0; 2064 return 0;
2037 2065
2038 head = page_buffers(page);
2039 bh = head; 2066 bh = head;
2040 block_start = 0; 2067 block_start = 0;
2041 do { 2068 do {
@@ -2068,18 +2095,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
2068 struct inode *inode = page->mapping->host; 2095 struct inode *inode = page->mapping->host;
2069 sector_t iblock, lblock; 2096 sector_t iblock, lblock;
2070 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 2097 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
2071 unsigned int blocksize; 2098 unsigned int blocksize, bbits;
2072 int nr, i; 2099 int nr, i;
2073 int fully_mapped = 1; 2100 int fully_mapped = 1;
2074 2101
2075 BUG_ON(!PageLocked(page)); 2102 head = create_page_buffers(page, inode, 0);
2076 blocksize = 1 << inode->i_blkbits; 2103 blocksize = head->b_size;
2077 if (!page_has_buffers(page)) 2104 bbits = block_size_bits(blocksize);
2078 create_empty_buffers(page, blocksize, 0);
2079 head = page_buffers(page);
2080 2105
2081 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); 2106 iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
2082 lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; 2107 lblock = (i_size_read(inode)+blocksize-1) >> bbits;
2083 bh = head; 2108 bh = head;
2084 nr = 0; 2109 nr = 0;
2085 i = 0; 2110 i = 0;
@@ -2864,6 +2889,56 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2864 bio_put(bio); 2889 bio_put(bio);
2865} 2890}
2866 2891
2892/*
2893 * This allows us to do IO even on the odd last sectors
2894 * of a device, even if the bh block size is some multiple
2895 * of the physical sector size.
2896 *
2897 * We'll just truncate the bio to the size of the device,
2898 * and clear the end of the buffer head manually.
2899 *
2900 * Truly out-of-range accesses will turn into actual IO
2901 * errors, this only handles the "we need to be able to
2902 * do IO at the final sector" case.
2903 */
2904static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
2905{
2906 sector_t maxsector;
2907 unsigned bytes;
2908
2909 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
2910 if (!maxsector)
2911 return;
2912
2913 /*
2914 * If the *whole* IO is past the end of the device,
2915 * let it through, and the IO layer will turn it into
2916 * an EIO.
2917 */
2918 if (unlikely(bio->bi_sector >= maxsector))
2919 return;
2920
2921 maxsector -= bio->bi_sector;
2922 bytes = bio->bi_size;
2923 if (likely((bytes >> 9) <= maxsector))
2924 return;
2925
2926 /* Uhhuh. We've got a bh that straddles the device size! */
2927 bytes = maxsector << 9;
2928
2929 /* Truncate the bio.. */
2930 bio->bi_size = bytes;
2931 bio->bi_io_vec[0].bv_len = bytes;
2932
2933 /* ..and clear the end of the buffer for reads */
2934 if ((rw & RW_MASK) == READ) {
2935 void *kaddr = kmap_atomic(bh->b_page);
2936 memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes);
2937 kunmap_atomic(kaddr);
2938 flush_dcache_page(bh->b_page);
2939 }
2940}
2941
2867int submit_bh(int rw, struct buffer_head * bh) 2942int submit_bh(int rw, struct buffer_head * bh)
2868{ 2943{
2869 struct bio *bio; 2944 struct bio *bio;
@@ -2900,6 +2975,9 @@ int submit_bh(int rw, struct buffer_head * bh)
2900 bio->bi_end_io = end_bio_bh_io_sync; 2975 bio->bi_end_io = end_bio_bh_io_sync;
2901 bio->bi_private = bh; 2976 bio->bi_private = bh;
2902 2977
2978 /* Take care of bh's that straddle the end of the device */
2979 guard_bh_eod(rw, bio, bh);
2980
2903 bio_get(bio); 2981 bio_get(bio);
2904 submit_bio(rw, bio); 2982 submit_bio(rw, bio);
2905 2983