diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 164 |
1 files changed, 121 insertions, 43 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..7a75c3e0fd58 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -46,8 +46,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | |||
46 | 46 | ||
47 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) | 47 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) |
48 | 48 | ||
49 | inline void | 49 | void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) |
50 | init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) | ||
51 | { | 50 | { |
52 | bh->b_end_io = handler; | 51 | bh->b_end_io = handler; |
53 | bh->b_private = private; | 52 | bh->b_private = private; |
@@ -555,7 +554,7 @@ void emergency_thaw_all(void) | |||
555 | */ | 554 | */ |
556 | int sync_mapping_buffers(struct address_space *mapping) | 555 | int sync_mapping_buffers(struct address_space *mapping) |
557 | { | 556 | { |
558 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 557 | struct address_space *buffer_mapping = mapping->private_data; |
559 | 558 | ||
560 | if (buffer_mapping == NULL || list_empty(&mapping->private_list)) | 559 | if (buffer_mapping == NULL || list_empty(&mapping->private_list)) |
561 | return 0; | 560 | return 0; |
@@ -588,10 +587,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) | |||
588 | struct address_space *buffer_mapping = bh->b_page->mapping; | 587 | struct address_space *buffer_mapping = bh->b_page->mapping; |
589 | 588 | ||
590 | mark_buffer_dirty(bh); | 589 | mark_buffer_dirty(bh); |
591 | if (!mapping->assoc_mapping) { | 590 | if (!mapping->private_data) { |
592 | mapping->assoc_mapping = buffer_mapping; | 591 | mapping->private_data = buffer_mapping; |
593 | } else { | 592 | } else { |
594 | BUG_ON(mapping->assoc_mapping != buffer_mapping); | 593 | BUG_ON(mapping->private_data != buffer_mapping); |
595 | } | 594 | } |
596 | if (!bh->b_assoc_map) { | 595 | if (!bh->b_assoc_map) { |
597 | spin_lock(&buffer_mapping->private_lock); | 596 | spin_lock(&buffer_mapping->private_lock); |
@@ -788,7 +787,7 @@ void invalidate_inode_buffers(struct inode *inode) | |||
788 | if (inode_has_buffers(inode)) { | 787 | if (inode_has_buffers(inode)) { |
789 | struct address_space *mapping = &inode->i_data; | 788 | struct address_space *mapping = &inode->i_data; |
790 | struct list_head *list = &mapping->private_list; | 789 | struct list_head *list = &mapping->private_list; |
791 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 790 | struct address_space *buffer_mapping = mapping->private_data; |
792 | 791 | ||
793 | spin_lock(&buffer_mapping->private_lock); | 792 | spin_lock(&buffer_mapping->private_lock); |
794 | while (!list_empty(list)) | 793 | while (!list_empty(list)) |
@@ -811,7 +810,7 @@ int remove_inode_buffers(struct inode *inode) | |||
811 | if (inode_has_buffers(inode)) { | 810 | if (inode_has_buffers(inode)) { |
812 | struct address_space *mapping = &inode->i_data; | 811 | struct address_space *mapping = &inode->i_data; |
813 | struct list_head *list = &mapping->private_list; | 812 | struct list_head *list = &mapping->private_list; |
814 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 813 | struct address_space *buffer_mapping = mapping->private_data; |
815 | 814 | ||
816 | spin_lock(&buffer_mapping->private_lock); | 815 | spin_lock(&buffer_mapping->private_lock); |
817 | while (!list_empty(list)) { | 816 | while (!list_empty(list)) { |
@@ -850,13 +849,10 @@ try_again: | |||
850 | if (!bh) | 849 | if (!bh) |
851 | goto no_grow; | 850 | goto no_grow; |
852 | 851 | ||
853 | bh->b_bdev = NULL; | ||
854 | bh->b_this_page = head; | 852 | bh->b_this_page = head; |
855 | bh->b_blocknr = -1; | 853 | bh->b_blocknr = -1; |
856 | head = bh; | 854 | head = bh; |
857 | 855 | ||
858 | bh->b_state = 0; | ||
859 | atomic_set(&bh->b_count, 0); | ||
860 | bh->b_size = size; | 856 | bh->b_size = size; |
861 | 857 | ||
862 | /* Link the buffer to its page */ | 858 | /* Link the buffer to its page */ |
@@ -911,6 +907,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head) | |||
911 | attach_page_buffers(page, head); | 907 | attach_page_buffers(page, head); |
912 | } | 908 | } |
913 | 909 | ||
910 | static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) | ||
911 | { | ||
912 | sector_t retval = ~((sector_t)0); | ||
913 | loff_t sz = i_size_read(bdev->bd_inode); | ||
914 | |||
915 | if (sz) { | ||
916 | unsigned int sizebits = blksize_bits(size); | ||
917 | retval = (sz >> sizebits); | ||
918 | } | ||
919 | return retval; | ||
920 | } | ||
921 | |||
914 | /* | 922 | /* |
915 | * Initialise the state of a blockdev page's buffers. | 923 | * Initialise the state of a blockdev page's buffers. |
916 | */ | 924 | */ |
@@ -921,7 +929,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
921 | struct buffer_head *head = page_buffers(page); | 929 | struct buffer_head *head = page_buffers(page); |
922 | struct buffer_head *bh = head; | 930 | struct buffer_head *bh = head; |
923 | int uptodate = PageUptodate(page); | 931 | int uptodate = PageUptodate(page); |
924 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); | 932 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); |
925 | 933 | ||
926 | do { | 934 | do { |
927 | if (!buffer_mapped(bh)) { | 935 | if (!buffer_mapped(bh)) { |
@@ -1553,6 +1561,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) | |||
1553 | EXPORT_SYMBOL(unmap_underlying_metadata); | 1561 | EXPORT_SYMBOL(unmap_underlying_metadata); |
1554 | 1562 | ||
1555 | /* | 1563 | /* |
1564 | * Size is a power-of-two in the range 512..PAGE_SIZE, | ||
1565 | * and the case we care about most is PAGE_SIZE. | ||
1566 | * | ||
1567 | * So this *could* possibly be written with those | ||
1568 | * constraints in mind (relevant mostly if some | ||
1569 | * architecture has a slow bit-scan instruction) | ||
1570 | */ | ||
1571 | static inline int block_size_bits(unsigned int blocksize) | ||
1572 | { | ||
1573 | return ilog2(blocksize); | ||
1574 | } | ||
1575 | |||
1576 | static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) | ||
1577 | { | ||
1578 | BUG_ON(!PageLocked(page)); | ||
1579 | |||
1580 | if (!page_has_buffers(page)) | ||
1581 | create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); | ||
1582 | return page_buffers(page); | ||
1583 | } | ||
1584 | |||
1585 | /* | ||
1556 | * NOTE! All mapped/uptodate combinations are valid: | 1586 | * NOTE! All mapped/uptodate combinations are valid: |
1557 | * | 1587 | * |
1558 | * Mapped Uptodate Meaning | 1588 | * Mapped Uptodate Meaning |
@@ -1589,19 +1619,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1589 | sector_t block; | 1619 | sector_t block; |
1590 | sector_t last_block; | 1620 | sector_t last_block; |
1591 | struct buffer_head *bh, *head; | 1621 | struct buffer_head *bh, *head; |
1592 | const unsigned blocksize = 1 << inode->i_blkbits; | 1622 | unsigned int blocksize, bbits; |
1593 | int nr_underway = 0; | 1623 | int nr_underway = 0; |
1594 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1624 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1595 | WRITE_SYNC : WRITE); | 1625 | WRITE_SYNC : WRITE); |
1596 | 1626 | ||
1597 | BUG_ON(!PageLocked(page)); | 1627 | head = create_page_buffers(page, inode, |
1598 | |||
1599 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | ||
1600 | |||
1601 | if (!page_has_buffers(page)) { | ||
1602 | create_empty_buffers(page, blocksize, | ||
1603 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1628 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1604 | } | ||
1605 | 1629 | ||
1606 | /* | 1630 | /* |
1607 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | 1631 | * Be very careful. We have no exclusion from __set_page_dirty_buffers |
@@ -1613,9 +1637,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1613 | * handle that here by just cleaning them. | 1637 | * handle that here by just cleaning them. |
1614 | */ | 1638 | */ |
1615 | 1639 | ||
1616 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1617 | head = page_buffers(page); | ||
1618 | bh = head; | 1640 | bh = head; |
1641 | blocksize = bh->b_size; | ||
1642 | bbits = block_size_bits(blocksize); | ||
1643 | |||
1644 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | ||
1645 | last_block = (i_size_read(inode) - 1) >> bbits; | ||
1619 | 1646 | ||
1620 | /* | 1647 | /* |
1621 | * Get all the dirty buffers mapped to disk addresses and | 1648 | * Get all the dirty buffers mapped to disk addresses and |
@@ -1806,12 +1833,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1806 | BUG_ON(to > PAGE_CACHE_SIZE); | 1833 | BUG_ON(to > PAGE_CACHE_SIZE); |
1807 | BUG_ON(from > to); | 1834 | BUG_ON(from > to); |
1808 | 1835 | ||
1809 | blocksize = 1 << inode->i_blkbits; | 1836 | head = create_page_buffers(page, inode, 0); |
1810 | if (!page_has_buffers(page)) | 1837 | blocksize = head->b_size; |
1811 | create_empty_buffers(page, blocksize, 0); | 1838 | bbits = block_size_bits(blocksize); |
1812 | head = page_buffers(page); | ||
1813 | 1839 | ||
1814 | bbits = inode->i_blkbits; | ||
1815 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | 1840 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
1816 | 1841 | ||
1817 | for(bh = head, block_start = 0; bh != head || !block_start; | 1842 | for(bh = head, block_start = 0; bh != head || !block_start; |
@@ -1881,11 +1906,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1881 | unsigned blocksize; | 1906 | unsigned blocksize; |
1882 | struct buffer_head *bh, *head; | 1907 | struct buffer_head *bh, *head; |
1883 | 1908 | ||
1884 | blocksize = 1 << inode->i_blkbits; | 1909 | bh = head = page_buffers(page); |
1910 | blocksize = bh->b_size; | ||
1885 | 1911 | ||
1886 | for(bh = head = page_buffers(page), block_start = 0; | 1912 | block_start = 0; |
1887 | bh != head || !block_start; | 1913 | do { |
1888 | block_start=block_end, bh = bh->b_this_page) { | ||
1889 | block_end = block_start + blocksize; | 1914 | block_end = block_start + blocksize; |
1890 | if (block_end <= from || block_start >= to) { | 1915 | if (block_end <= from || block_start >= to) { |
1891 | if (!buffer_uptodate(bh)) | 1916 | if (!buffer_uptodate(bh)) |
@@ -1895,7 +1920,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1895 | mark_buffer_dirty(bh); | 1920 | mark_buffer_dirty(bh); |
1896 | } | 1921 | } |
1897 | clear_buffer_new(bh); | 1922 | clear_buffer_new(bh); |
1898 | } | 1923 | |
1924 | block_start = block_end; | ||
1925 | bh = bh->b_this_page; | ||
1926 | } while (bh != head); | ||
1899 | 1927 | ||
1900 | /* | 1928 | /* |
1901 | * If this is a partial write which happened to make all buffers | 1929 | * If this is a partial write which happened to make all buffers |
@@ -2020,7 +2048,6 @@ EXPORT_SYMBOL(generic_write_end); | |||
2020 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | 2048 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, |
2021 | unsigned long from) | 2049 | unsigned long from) |
2022 | { | 2050 | { |
2023 | struct inode *inode = page->mapping->host; | ||
2024 | unsigned block_start, block_end, blocksize; | 2051 | unsigned block_start, block_end, blocksize; |
2025 | unsigned to; | 2052 | unsigned to; |
2026 | struct buffer_head *bh, *head; | 2053 | struct buffer_head *bh, *head; |
@@ -2029,13 +2056,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | |||
2029 | if (!page_has_buffers(page)) | 2056 | if (!page_has_buffers(page)) |
2030 | return 0; | 2057 | return 0; |
2031 | 2058 | ||
2032 | blocksize = 1 << inode->i_blkbits; | 2059 | head = page_buffers(page); |
2060 | blocksize = head->b_size; | ||
2033 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); | 2061 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); |
2034 | to = from + to; | 2062 | to = from + to; |
2035 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) | 2063 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) |
2036 | return 0; | 2064 | return 0; |
2037 | 2065 | ||
2038 | head = page_buffers(page); | ||
2039 | bh = head; | 2066 | bh = head; |
2040 | block_start = 0; | 2067 | block_start = 0; |
2041 | do { | 2068 | do { |
@@ -2068,18 +2095,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
2068 | struct inode *inode = page->mapping->host; | 2095 | struct inode *inode = page->mapping->host; |
2069 | sector_t iblock, lblock; | 2096 | sector_t iblock, lblock; |
2070 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | 2097 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; |
2071 | unsigned int blocksize; | 2098 | unsigned int blocksize, bbits; |
2072 | int nr, i; | 2099 | int nr, i; |
2073 | int fully_mapped = 1; | 2100 | int fully_mapped = 1; |
2074 | 2101 | ||
2075 | BUG_ON(!PageLocked(page)); | 2102 | head = create_page_buffers(page, inode, 0); |
2076 | blocksize = 1 << inode->i_blkbits; | 2103 | blocksize = head->b_size; |
2077 | if (!page_has_buffers(page)) | 2104 | bbits = block_size_bits(blocksize); |
2078 | create_empty_buffers(page, blocksize, 0); | ||
2079 | head = page_buffers(page); | ||
2080 | 2105 | ||
2081 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2106 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
2082 | lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; | 2107 | lblock = (i_size_read(inode)+blocksize-1) >> bbits; |
2083 | bh = head; | 2108 | bh = head; |
2084 | nr = 0; | 2109 | nr = 0; |
2085 | i = 0; | 2110 | i = 0; |
@@ -2864,6 +2889,56 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2864 | bio_put(bio); | 2889 | bio_put(bio); |
2865 | } | 2890 | } |
2866 | 2891 | ||
2892 | /* | ||
2893 | * This allows us to do IO even on the odd last sectors | ||
2894 | * of a device, even if the bh block size is some multiple | ||
2895 | * of the physical sector size. | ||
2896 | * | ||
2897 | * We'll just truncate the bio to the size of the device, | ||
2898 | * and clear the end of the buffer head manually. | ||
2899 | * | ||
2900 | * Truly out-of-range accesses will turn into actual IO | ||
2901 | * errors, this only handles the "we need to be able to | ||
2902 | * do IO at the final sector" case. | ||
2903 | */ | ||
2904 | static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | ||
2905 | { | ||
2906 | sector_t maxsector; | ||
2907 | unsigned bytes; | ||
2908 | |||
2909 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | ||
2910 | if (!maxsector) | ||
2911 | return; | ||
2912 | |||
2913 | /* | ||
2914 | * If the *whole* IO is past the end of the device, | ||
2915 | * let it through, and the IO layer will turn it into | ||
2916 | * an EIO. | ||
2917 | */ | ||
2918 | if (unlikely(bio->bi_sector >= maxsector)) | ||
2919 | return; | ||
2920 | |||
2921 | maxsector -= bio->bi_sector; | ||
2922 | bytes = bio->bi_size; | ||
2923 | if (likely((bytes >> 9) <= maxsector)) | ||
2924 | return; | ||
2925 | |||
2926 | /* Uhhuh. We've got a bh that straddles the device size! */ | ||
2927 | bytes = maxsector << 9; | ||
2928 | |||
2929 | /* Truncate the bio.. */ | ||
2930 | bio->bi_size = bytes; | ||
2931 | bio->bi_io_vec[0].bv_len = bytes; | ||
2932 | |||
2933 | /* ..and clear the end of the buffer for reads */ | ||
2934 | if ((rw & RW_MASK) == READ) { | ||
2935 | void *kaddr = kmap_atomic(bh->b_page); | ||
2936 | memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); | ||
2937 | kunmap_atomic(kaddr); | ||
2938 | flush_dcache_page(bh->b_page); | ||
2939 | } | ||
2940 | } | ||
2941 | |||
2867 | int submit_bh(int rw, struct buffer_head * bh) | 2942 | int submit_bh(int rw, struct buffer_head * bh) |
2868 | { | 2943 | { |
2869 | struct bio *bio; | 2944 | struct bio *bio; |
@@ -2900,6 +2975,9 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2900 | bio->bi_end_io = end_bio_bh_io_sync; | 2975 | bio->bi_end_io = end_bio_bh_io_sync; |
2901 | bio->bi_private = bh; | 2976 | bio->bi_private = bh; |
2902 | 2977 | ||
2978 | /* Take care of bh's that straddle the end of the device */ | ||
2979 | guard_bh_eod(rw, bio, bh); | ||
2980 | |||
2903 | bio_get(bio); | 2981 | bio_get(bio); |
2904 | submit_bio(rw, bio); | 2982 | submit_bio(rw, bio); |
2905 | 2983 | ||