diff options
Diffstat (limited to 'fs/buffer.c')
| -rw-r--r-- | fs/buffer.c | 163 |
1 files changed, 120 insertions, 43 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..c017a2dfb909 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -46,8 +46,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | |||
| 46 | 46 | ||
| 47 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) | 47 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) |
| 48 | 48 | ||
| 49 | inline void | 49 | void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) |
| 50 | init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) | ||
| 51 | { | 50 | { |
| 52 | bh->b_end_io = handler; | 51 | bh->b_end_io = handler; |
| 53 | bh->b_private = private; | 52 | bh->b_private = private; |
| @@ -555,7 +554,7 @@ void emergency_thaw_all(void) | |||
| 555 | */ | 554 | */ |
| 556 | int sync_mapping_buffers(struct address_space *mapping) | 555 | int sync_mapping_buffers(struct address_space *mapping) |
| 557 | { | 556 | { |
| 558 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 557 | struct address_space *buffer_mapping = mapping->private_data; |
| 559 | 558 | ||
| 560 | if (buffer_mapping == NULL || list_empty(&mapping->private_list)) | 559 | if (buffer_mapping == NULL || list_empty(&mapping->private_list)) |
| 561 | return 0; | 560 | return 0; |
| @@ -588,10 +587,10 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) | |||
| 588 | struct address_space *buffer_mapping = bh->b_page->mapping; | 587 | struct address_space *buffer_mapping = bh->b_page->mapping; |
| 589 | 588 | ||
| 590 | mark_buffer_dirty(bh); | 589 | mark_buffer_dirty(bh); |
| 591 | if (!mapping->assoc_mapping) { | 590 | if (!mapping->private_data) { |
| 592 | mapping->assoc_mapping = buffer_mapping; | 591 | mapping->private_data = buffer_mapping; |
| 593 | } else { | 592 | } else { |
| 594 | BUG_ON(mapping->assoc_mapping != buffer_mapping); | 593 | BUG_ON(mapping->private_data != buffer_mapping); |
| 595 | } | 594 | } |
| 596 | if (!bh->b_assoc_map) { | 595 | if (!bh->b_assoc_map) { |
| 597 | spin_lock(&buffer_mapping->private_lock); | 596 | spin_lock(&buffer_mapping->private_lock); |
| @@ -788,7 +787,7 @@ void invalidate_inode_buffers(struct inode *inode) | |||
| 788 | if (inode_has_buffers(inode)) { | 787 | if (inode_has_buffers(inode)) { |
| 789 | struct address_space *mapping = &inode->i_data; | 788 | struct address_space *mapping = &inode->i_data; |
| 790 | struct list_head *list = &mapping->private_list; | 789 | struct list_head *list = &mapping->private_list; |
| 791 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 790 | struct address_space *buffer_mapping = mapping->private_data; |
| 792 | 791 | ||
| 793 | spin_lock(&buffer_mapping->private_lock); | 792 | spin_lock(&buffer_mapping->private_lock); |
| 794 | while (!list_empty(list)) | 793 | while (!list_empty(list)) |
| @@ -811,7 +810,7 @@ int remove_inode_buffers(struct inode *inode) | |||
| 811 | if (inode_has_buffers(inode)) { | 810 | if (inode_has_buffers(inode)) { |
| 812 | struct address_space *mapping = &inode->i_data; | 811 | struct address_space *mapping = &inode->i_data; |
| 813 | struct list_head *list = &mapping->private_list; | 812 | struct list_head *list = &mapping->private_list; |
| 814 | struct address_space *buffer_mapping = mapping->assoc_mapping; | 813 | struct address_space *buffer_mapping = mapping->private_data; |
| 815 | 814 | ||
| 816 | spin_lock(&buffer_mapping->private_lock); | 815 | spin_lock(&buffer_mapping->private_lock); |
| 817 | while (!list_empty(list)) { | 816 | while (!list_empty(list)) { |
| @@ -850,13 +849,10 @@ try_again: | |||
| 850 | if (!bh) | 849 | if (!bh) |
| 851 | goto no_grow; | 850 | goto no_grow; |
| 852 | 851 | ||
| 853 | bh->b_bdev = NULL; | ||
| 854 | bh->b_this_page = head; | 852 | bh->b_this_page = head; |
| 855 | bh->b_blocknr = -1; | 853 | bh->b_blocknr = -1; |
| 856 | head = bh; | 854 | head = bh; |
| 857 | 855 | ||
| 858 | bh->b_state = 0; | ||
| 859 | atomic_set(&bh->b_count, 0); | ||
| 860 | bh->b_size = size; | 856 | bh->b_size = size; |
| 861 | 857 | ||
| 862 | /* Link the buffer to its page */ | 858 | /* Link the buffer to its page */ |
| @@ -911,6 +907,18 @@ link_dev_buffers(struct page *page, struct buffer_head *head) | |||
| 911 | attach_page_buffers(page, head); | 907 | attach_page_buffers(page, head); |
| 912 | } | 908 | } |
| 913 | 909 | ||
| 910 | static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) | ||
| 911 | { | ||
| 912 | sector_t retval = ~((sector_t)0); | ||
| 913 | loff_t sz = i_size_read(bdev->bd_inode); | ||
| 914 | |||
| 915 | if (sz) { | ||
| 916 | unsigned int sizebits = blksize_bits(size); | ||
| 917 | retval = (sz >> sizebits); | ||
| 918 | } | ||
| 919 | return retval; | ||
| 920 | } | ||
| 921 | |||
| 914 | /* | 922 | /* |
| 915 | * Initialise the state of a blockdev page's buffers. | 923 | * Initialise the state of a blockdev page's buffers. |
| 916 | */ | 924 | */ |
| @@ -921,7 +929,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, | |||
| 921 | struct buffer_head *head = page_buffers(page); | 929 | struct buffer_head *head = page_buffers(page); |
| 922 | struct buffer_head *bh = head; | 930 | struct buffer_head *bh = head; |
| 923 | int uptodate = PageUptodate(page); | 931 | int uptodate = PageUptodate(page); |
| 924 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); | 932 | sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size); |
| 925 | 933 | ||
| 926 | do { | 934 | do { |
| 927 | if (!buffer_mapped(bh)) { | 935 | if (!buffer_mapped(bh)) { |
| @@ -1553,6 +1561,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) | |||
| 1553 | EXPORT_SYMBOL(unmap_underlying_metadata); | 1561 | EXPORT_SYMBOL(unmap_underlying_metadata); |
| 1554 | 1562 | ||
| 1555 | /* | 1563 | /* |
| 1564 | * Size is a power-of-two in the range 512..PAGE_SIZE, | ||
| 1565 | * and the case we care about most is PAGE_SIZE. | ||
| 1566 | * | ||
| 1567 | * So this *could* possibly be written with those | ||
| 1568 | * constraints in mind (relevant mostly if some | ||
| 1569 | * architecture has a slow bit-scan instruction) | ||
| 1570 | */ | ||
| 1571 | static inline int block_size_bits(unsigned int blocksize) | ||
| 1572 | { | ||
| 1573 | return ilog2(blocksize); | ||
| 1574 | } | ||
| 1575 | |||
| 1576 | static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) | ||
| 1577 | { | ||
| 1578 | BUG_ON(!PageLocked(page)); | ||
| 1579 | |||
| 1580 | if (!page_has_buffers(page)) | ||
| 1581 | create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); | ||
| 1582 | return page_buffers(page); | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | /* | ||
| 1556 | * NOTE! All mapped/uptodate combinations are valid: | 1586 | * NOTE! All mapped/uptodate combinations are valid: |
| 1557 | * | 1587 | * |
| 1558 | * Mapped Uptodate Meaning | 1588 | * Mapped Uptodate Meaning |
| @@ -1589,19 +1619,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1589 | sector_t block; | 1619 | sector_t block; |
| 1590 | sector_t last_block; | 1620 | sector_t last_block; |
| 1591 | struct buffer_head *bh, *head; | 1621 | struct buffer_head *bh, *head; |
| 1592 | const unsigned blocksize = 1 << inode->i_blkbits; | 1622 | unsigned int blocksize, bbits; |
| 1593 | int nr_underway = 0; | 1623 | int nr_underway = 0; |
| 1594 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1624 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
| 1595 | WRITE_SYNC : WRITE); | 1625 | WRITE_SYNC : WRITE); |
| 1596 | 1626 | ||
| 1597 | BUG_ON(!PageLocked(page)); | 1627 | head = create_page_buffers(page, inode, |
| 1598 | |||
| 1599 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | ||
| 1600 | |||
| 1601 | if (!page_has_buffers(page)) { | ||
| 1602 | create_empty_buffers(page, blocksize, | ||
| 1603 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1628 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
| 1604 | } | ||
| 1605 | 1629 | ||
| 1606 | /* | 1630 | /* |
| 1607 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | 1631 | * Be very careful. We have no exclusion from __set_page_dirty_buffers |
| @@ -1613,9 +1637,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1613 | * handle that here by just cleaning them. | 1637 | * handle that here by just cleaning them. |
| 1614 | */ | 1638 | */ |
| 1615 | 1639 | ||
| 1616 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1617 | head = page_buffers(page); | ||
| 1618 | bh = head; | 1640 | bh = head; |
| 1641 | blocksize = bh->b_size; | ||
| 1642 | bbits = block_size_bits(blocksize); | ||
| 1643 | |||
| 1644 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | ||
| 1645 | last_block = (i_size_read(inode) - 1) >> bbits; | ||
| 1619 | 1646 | ||
| 1620 | /* | 1647 | /* |
| 1621 | * Get all the dirty buffers mapped to disk addresses and | 1648 | * Get all the dirty buffers mapped to disk addresses and |
| @@ -1806,12 +1833,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
| 1806 | BUG_ON(to > PAGE_CACHE_SIZE); | 1833 | BUG_ON(to > PAGE_CACHE_SIZE); |
| 1807 | BUG_ON(from > to); | 1834 | BUG_ON(from > to); |
| 1808 | 1835 | ||
| 1809 | blocksize = 1 << inode->i_blkbits; | 1836 | head = create_page_buffers(page, inode, 0); |
| 1810 | if (!page_has_buffers(page)) | 1837 | blocksize = head->b_size; |
| 1811 | create_empty_buffers(page, blocksize, 0); | 1838 | bbits = block_size_bits(blocksize); |
| 1812 | head = page_buffers(page); | ||
| 1813 | 1839 | ||
| 1814 | bbits = inode->i_blkbits; | ||
| 1815 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | 1840 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
| 1816 | 1841 | ||
| 1817 | for(bh = head, block_start = 0; bh != head || !block_start; | 1842 | for(bh = head, block_start = 0; bh != head || !block_start; |
| @@ -1881,11 +1906,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
| 1881 | unsigned blocksize; | 1906 | unsigned blocksize; |
| 1882 | struct buffer_head *bh, *head; | 1907 | struct buffer_head *bh, *head; |
| 1883 | 1908 | ||
| 1884 | blocksize = 1 << inode->i_blkbits; | 1909 | bh = head = page_buffers(page); |
| 1910 | blocksize = bh->b_size; | ||
| 1885 | 1911 | ||
| 1886 | for(bh = head = page_buffers(page), block_start = 0; | 1912 | block_start = 0; |
| 1887 | bh != head || !block_start; | 1913 | do { |
| 1888 | block_start=block_end, bh = bh->b_this_page) { | ||
| 1889 | block_end = block_start + blocksize; | 1914 | block_end = block_start + blocksize; |
| 1890 | if (block_end <= from || block_start >= to) { | 1915 | if (block_end <= from || block_start >= to) { |
| 1891 | if (!buffer_uptodate(bh)) | 1916 | if (!buffer_uptodate(bh)) |
| @@ -1895,7 +1920,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
| 1895 | mark_buffer_dirty(bh); | 1920 | mark_buffer_dirty(bh); |
| 1896 | } | 1921 | } |
| 1897 | clear_buffer_new(bh); | 1922 | clear_buffer_new(bh); |
| 1898 | } | 1923 | |
| 1924 | block_start = block_end; | ||
| 1925 | bh = bh->b_this_page; | ||
| 1926 | } while (bh != head); | ||
| 1899 | 1927 | ||
| 1900 | /* | 1928 | /* |
| 1901 | * If this is a partial write which happened to make all buffers | 1929 | * If this is a partial write which happened to make all buffers |
| @@ -2020,7 +2048,6 @@ EXPORT_SYMBOL(generic_write_end); | |||
| 2020 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | 2048 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, |
| 2021 | unsigned long from) | 2049 | unsigned long from) |
| 2022 | { | 2050 | { |
| 2023 | struct inode *inode = page->mapping->host; | ||
| 2024 | unsigned block_start, block_end, blocksize; | 2051 | unsigned block_start, block_end, blocksize; |
| 2025 | unsigned to; | 2052 | unsigned to; |
| 2026 | struct buffer_head *bh, *head; | 2053 | struct buffer_head *bh, *head; |
| @@ -2029,13 +2056,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | |||
| 2029 | if (!page_has_buffers(page)) | 2056 | if (!page_has_buffers(page)) |
| 2030 | return 0; | 2057 | return 0; |
| 2031 | 2058 | ||
| 2032 | blocksize = 1 << inode->i_blkbits; | 2059 | head = page_buffers(page); |
| 2060 | blocksize = head->b_size; | ||
| 2033 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); | 2061 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); |
| 2034 | to = from + to; | 2062 | to = from + to; |
| 2035 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) | 2063 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) |
| 2036 | return 0; | 2064 | return 0; |
| 2037 | 2065 | ||
| 2038 | head = page_buffers(page); | ||
| 2039 | bh = head; | 2066 | bh = head; |
| 2040 | block_start = 0; | 2067 | block_start = 0; |
| 2041 | do { | 2068 | do { |
| @@ -2068,18 +2095,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
| 2068 | struct inode *inode = page->mapping->host; | 2095 | struct inode *inode = page->mapping->host; |
| 2069 | sector_t iblock, lblock; | 2096 | sector_t iblock, lblock; |
| 2070 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | 2097 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; |
| 2071 | unsigned int blocksize; | 2098 | unsigned int blocksize, bbits; |
| 2072 | int nr, i; | 2099 | int nr, i; |
| 2073 | int fully_mapped = 1; | 2100 | int fully_mapped = 1; |
| 2074 | 2101 | ||
| 2075 | BUG_ON(!PageLocked(page)); | 2102 | head = create_page_buffers(page, inode, 0); |
| 2076 | blocksize = 1 << inode->i_blkbits; | 2103 | blocksize = head->b_size; |
| 2077 | if (!page_has_buffers(page)) | 2104 | bbits = block_size_bits(blocksize); |
| 2078 | create_empty_buffers(page, blocksize, 0); | ||
| 2079 | head = page_buffers(page); | ||
| 2080 | 2105 | ||
| 2081 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2106 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
| 2082 | lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; | 2107 | lblock = (i_size_read(inode)+blocksize-1) >> bbits; |
| 2083 | bh = head; | 2108 | bh = head; |
| 2084 | nr = 0; | 2109 | nr = 0; |
| 2085 | i = 0; | 2110 | i = 0; |
| @@ -2864,6 +2889,55 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
| 2864 | bio_put(bio); | 2889 | bio_put(bio); |
| 2865 | } | 2890 | } |
| 2866 | 2891 | ||
| 2892 | /* | ||
| 2893 | * This allows us to do IO even on the odd last sectors | ||
| 2894 | * of a device, even if the bh block size is some multiple | ||
| 2895 | * of the physical sector size. | ||
| 2896 | * | ||
| 2897 | * We'll just truncate the bio to the size of the device, | ||
| 2898 | * and clear the end of the buffer head manually. | ||
| 2899 | * | ||
| 2900 | * Truly out-of-range accesses will turn into actual IO | ||
| 2901 | * errors, this only handles the "we need to be able to | ||
| 2902 | * do IO at the final sector" case. | ||
| 2903 | */ | ||
| 2904 | static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh) | ||
| 2905 | { | ||
| 2906 | sector_t maxsector; | ||
| 2907 | unsigned bytes; | ||
| 2908 | |||
| 2909 | maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; | ||
| 2910 | if (!maxsector) | ||
| 2911 | return; | ||
| 2912 | |||
| 2913 | /* | ||
| 2914 | * If the *whole* IO is past the end of the device, | ||
| 2915 | * let it through, and the IO layer will turn it into | ||
| 2916 | * an EIO. | ||
| 2917 | */ | ||
| 2918 | if (unlikely(bio->bi_sector >= maxsector)) | ||
| 2919 | return; | ||
| 2920 | |||
| 2921 | maxsector -= bio->bi_sector; | ||
| 2922 | bytes = bio->bi_size; | ||
| 2923 | if (likely((bytes >> 9) <= maxsector)) | ||
| 2924 | return; | ||
| 2925 | |||
| 2926 | /* Uhhuh. We've got a bh that straddles the device size! */ | ||
| 2927 | bytes = maxsector << 9; | ||
| 2928 | |||
| 2929 | /* Truncate the bio.. */ | ||
| 2930 | bio->bi_size = bytes; | ||
| 2931 | bio->bi_io_vec[0].bv_len = bytes; | ||
| 2932 | |||
| 2933 | /* ..and clear the end of the buffer for reads */ | ||
| 2934 | if ((rw & RW_MASK) == READ) { | ||
| 2935 | void *kaddr = kmap_atomic(bh->b_page); | ||
| 2936 | memset(kaddr + bh_offset(bh) + bytes, 0, bh->b_size - bytes); | ||
| 2937 | kunmap_atomic(kaddr); | ||
| 2938 | } | ||
| 2939 | } | ||
| 2940 | |||
| 2867 | int submit_bh(int rw, struct buffer_head * bh) | 2941 | int submit_bh(int rw, struct buffer_head * bh) |
| 2868 | { | 2942 | { |
| 2869 | struct bio *bio; | 2943 | struct bio *bio; |
| @@ -2900,6 +2974,9 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
| 2900 | bio->bi_end_io = end_bio_bh_io_sync; | 2974 | bio->bi_end_io = end_bio_bh_io_sync; |
| 2901 | bio->bi_private = bh; | 2975 | bio->bi_private = bh; |
| 2902 | 2976 | ||
| 2977 | /* Take care of bh's that straddle the end of the device */ | ||
| 2978 | guard_bh_eod(rw, bio, bh); | ||
| 2979 | |||
| 2903 | bio_get(bio); | 2980 | bio_get(bio); |
| 2904 | submit_bio(rw, bio); | 2981 | submit_bio(rw, bio); |
| 2905 | 2982 | ||
