diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-11-29 13:21:43 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-11-29 13:47:20 -0500 |
commit | 45bce8f3e3436bbe2e03dd2b076abdce79ffabb7 (patch) | |
tree | 554d8cf7254eb58210e98f2a1053f531f68f024b /fs | |
parent | 9489e9dcae718d5fde988e4a684a0f55b5f94d17 (diff) |
fs/buffer.c: make block-size be per-page and protected by the page lock
This makes the buffer size handling be a per-page thing, which allows us
to not have to worry about locking too much when changing the buffer
size. If a page doesn't have buffers, we still need to read the block
size from the inode, but we can do that with ACCESS_ONCE(), so that even
if the size is changing, we get a consistent value.
This doesn't convert all functions - many of the buffer functions are
used purely by filesystems, which in turn results in the buffer size
being fixed at mount-time. So they don't have the same consistency
issues that the raw device access can have.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/buffer.c | 79 |
1 files changed, 48 insertions, 31 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b5f044283edb..28a74ff5324b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1553,6 +1553,28 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) | |||
1553 | EXPORT_SYMBOL(unmap_underlying_metadata); | 1553 | EXPORT_SYMBOL(unmap_underlying_metadata); |
1554 | 1554 | ||
1555 | /* | 1555 | /* |
1556 | * Size is a power-of-two in the range 512..PAGE_SIZE, | ||
1557 | * and the case we care about most is PAGE_SIZE. | ||
1558 | * | ||
1559 | * So this *could* possibly be written with those | ||
1560 | * constraints in mind (relevant mostly if some | ||
1561 | * architecture has a slow bit-scan instruction) | ||
1562 | */ | ||
1563 | static inline int block_size_bits(unsigned int blocksize) | ||
1564 | { | ||
1565 | return ilog2(blocksize); | ||
1566 | } | ||
1567 | |||
1568 | static struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state) | ||
1569 | { | ||
1570 | BUG_ON(!PageLocked(page)); | ||
1571 | |||
1572 | if (!page_has_buffers(page)) | ||
1573 | create_empty_buffers(page, 1 << ACCESS_ONCE(inode->i_blkbits), b_state); | ||
1574 | return page_buffers(page); | ||
1575 | } | ||
1576 | |||
1577 | /* | ||
1556 | * NOTE! All mapped/uptodate combinations are valid: | 1578 | * NOTE! All mapped/uptodate combinations are valid: |
1557 | * | 1579 | * |
1558 | * Mapped Uptodate Meaning | 1580 | * Mapped Uptodate Meaning |
@@ -1589,19 +1611,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1589 | sector_t block; | 1611 | sector_t block; |
1590 | sector_t last_block; | 1612 | sector_t last_block; |
1591 | struct buffer_head *bh, *head; | 1613 | struct buffer_head *bh, *head; |
1592 | const unsigned blocksize = 1 << inode->i_blkbits; | 1614 | unsigned int blocksize, bbits; |
1593 | int nr_underway = 0; | 1615 | int nr_underway = 0; |
1594 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | 1616 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? |
1595 | WRITE_SYNC : WRITE); | 1617 | WRITE_SYNC : WRITE); |
1596 | 1618 | ||
1597 | BUG_ON(!PageLocked(page)); | 1619 | head = create_page_buffers(page, inode, |
1598 | |||
1599 | last_block = (i_size_read(inode) - 1) >> inode->i_blkbits; | ||
1600 | |||
1601 | if (!page_has_buffers(page)) { | ||
1602 | create_empty_buffers(page, blocksize, | ||
1603 | (1 << BH_Dirty)|(1 << BH_Uptodate)); | 1620 | (1 << BH_Dirty)|(1 << BH_Uptodate)); |
1604 | } | ||
1605 | 1621 | ||
1606 | /* | 1622 | /* |
1607 | * Be very careful. We have no exclusion from __set_page_dirty_buffers | 1623 | * Be very careful. We have no exclusion from __set_page_dirty_buffers |
@@ -1613,9 +1629,12 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1613 | * handle that here by just cleaning them. | 1629 | * handle that here by just cleaning them. |
1614 | */ | 1630 | */ |
1615 | 1631 | ||
1616 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
1617 | head = page_buffers(page); | ||
1618 | bh = head; | 1632 | bh = head; |
1633 | blocksize = bh->b_size; | ||
1634 | bbits = block_size_bits(blocksize); | ||
1635 | |||
1636 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | ||
1637 | last_block = (i_size_read(inode) - 1) >> bbits; | ||
1619 | 1638 | ||
1620 | /* | 1639 | /* |
1621 | * Get all the dirty buffers mapped to disk addresses and | 1640 | * Get all the dirty buffers mapped to disk addresses and |
@@ -1806,12 +1825,10 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, | |||
1806 | BUG_ON(to > PAGE_CACHE_SIZE); | 1825 | BUG_ON(to > PAGE_CACHE_SIZE); |
1807 | BUG_ON(from > to); | 1826 | BUG_ON(from > to); |
1808 | 1827 | ||
1809 | blocksize = 1 << inode->i_blkbits; | 1828 | head = create_page_buffers(page, inode, 0); |
1810 | if (!page_has_buffers(page)) | 1829 | blocksize = head->b_size; |
1811 | create_empty_buffers(page, blocksize, 0); | 1830 | bbits = block_size_bits(blocksize); |
1812 | head = page_buffers(page); | ||
1813 | 1831 | ||
1814 | bbits = inode->i_blkbits; | ||
1815 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); | 1832 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
1816 | 1833 | ||
1817 | for(bh = head, block_start = 0; bh != head || !block_start; | 1834 | for(bh = head, block_start = 0; bh != head || !block_start; |
@@ -1881,11 +1898,11 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1881 | unsigned blocksize; | 1898 | unsigned blocksize; |
1882 | struct buffer_head *bh, *head; | 1899 | struct buffer_head *bh, *head; |
1883 | 1900 | ||
1884 | blocksize = 1 << inode->i_blkbits; | 1901 | bh = head = page_buffers(page); |
1902 | blocksize = bh->b_size; | ||
1885 | 1903 | ||
1886 | for(bh = head = page_buffers(page), block_start = 0; | 1904 | block_start = 0; |
1887 | bh != head || !block_start; | 1905 | do { |
1888 | block_start=block_end, bh = bh->b_this_page) { | ||
1889 | block_end = block_start + blocksize; | 1906 | block_end = block_start + blocksize; |
1890 | if (block_end <= from || block_start >= to) { | 1907 | if (block_end <= from || block_start >= to) { |
1891 | if (!buffer_uptodate(bh)) | 1908 | if (!buffer_uptodate(bh)) |
@@ -1895,7 +1912,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1895 | mark_buffer_dirty(bh); | 1912 | mark_buffer_dirty(bh); |
1896 | } | 1913 | } |
1897 | clear_buffer_new(bh); | 1914 | clear_buffer_new(bh); |
1898 | } | 1915 | |
1916 | block_start = block_end; | ||
1917 | bh = bh->b_this_page; | ||
1918 | } while (bh != head); | ||
1899 | 1919 | ||
1900 | /* | 1920 | /* |
1901 | * If this is a partial write which happened to make all buffers | 1921 | * If this is a partial write which happened to make all buffers |
@@ -2020,7 +2040,6 @@ EXPORT_SYMBOL(generic_write_end); | |||
2020 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | 2040 | int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, |
2021 | unsigned long from) | 2041 | unsigned long from) |
2022 | { | 2042 | { |
2023 | struct inode *inode = page->mapping->host; | ||
2024 | unsigned block_start, block_end, blocksize; | 2043 | unsigned block_start, block_end, blocksize; |
2025 | unsigned to; | 2044 | unsigned to; |
2026 | struct buffer_head *bh, *head; | 2045 | struct buffer_head *bh, *head; |
@@ -2029,13 +2048,13 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, | |||
2029 | if (!page_has_buffers(page)) | 2048 | if (!page_has_buffers(page)) |
2030 | return 0; | 2049 | return 0; |
2031 | 2050 | ||
2032 | blocksize = 1 << inode->i_blkbits; | 2051 | head = page_buffers(page); |
2052 | blocksize = head->b_size; | ||
2033 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); | 2053 | to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); |
2034 | to = from + to; | 2054 | to = from + to; |
2035 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) | 2055 | if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) |
2036 | return 0; | 2056 | return 0; |
2037 | 2057 | ||
2038 | head = page_buffers(page); | ||
2039 | bh = head; | 2058 | bh = head; |
2040 | block_start = 0; | 2059 | block_start = 0; |
2041 | do { | 2060 | do { |
@@ -2068,18 +2087,16 @@ int block_read_full_page(struct page *page, get_block_t *get_block) | |||
2068 | struct inode *inode = page->mapping->host; | 2087 | struct inode *inode = page->mapping->host; |
2069 | sector_t iblock, lblock; | 2088 | sector_t iblock, lblock; |
2070 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | 2089 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; |
2071 | unsigned int blocksize; | 2090 | unsigned int blocksize, bbits; |
2072 | int nr, i; | 2091 | int nr, i; |
2073 | int fully_mapped = 1; | 2092 | int fully_mapped = 1; |
2074 | 2093 | ||
2075 | BUG_ON(!PageLocked(page)); | 2094 | head = create_page_buffers(page, inode, 0); |
2076 | blocksize = 1 << inode->i_blkbits; | 2095 | blocksize = head->b_size; |
2077 | if (!page_has_buffers(page)) | 2096 | bbits = block_size_bits(blocksize); |
2078 | create_empty_buffers(page, blocksize, 0); | ||
2079 | head = page_buffers(page); | ||
2080 | 2097 | ||
2081 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2098 | iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits); |
2082 | lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits; | 2099 | lblock = (i_size_read(inode)+blocksize-1) >> bbits; |
2083 | bh = head; | 2100 | bh = head; |
2084 | nr = 0; | 2101 | nr = 0; |
2085 | i = 0; | 2102 | i = 0; |