aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-10-16 04:25:01 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:42:55 -0400
commitafddba49d18f346e5cc2938b6ed7c512db18ca68 (patch)
tree4726e3d3b0e9e8e5b5d3b2b0cccb36446bbdf3ca /fs/buffer.c
parent637aff46f94a754207c80c8c64bf1b74f24b967d (diff)
fs: introduce write_begin, write_end, and perform_write aops
These are intended to replace prepare_write and commit_write with more flexible alternatives that are also able to avoid the buffered write deadlock problems efficiently (which prepare_write is unable to do). [mark.fasheh@oracle.com: API design contributions, code review and fixes] [akpm@linux-foundation.org: various fixes] [dmonakhov@sw.ru: new aop block_write_begin fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Dmitriy Monakhov <dmonakhov@openvz.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c201
1 files changed, 169 insertions, 32 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 9ece6c2086d0..68b8fbdc1b28 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1770,6 +1770,48 @@ recover:
1770 goto done; 1770 goto done;
1771} 1771}
1772 1772
1773/*
1774 * If a page has any new buffers, zero them out here, and mark them uptodate
1775 * and dirty so they'll be written out (in order to prevent uninitialised
1776 * block data from leaking). And clear the new bit.
1777 */
1778void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1779{
1780 unsigned int block_start, block_end;
1781 struct buffer_head *head, *bh;
1782
1783 BUG_ON(!PageLocked(page));
1784 if (!page_has_buffers(page))
1785 return;
1786
1787 bh = head = page_buffers(page);
1788 block_start = 0;
1789 do {
1790 block_end = block_start + bh->b_size;
1791
1792 if (buffer_new(bh)) {
1793 if (block_end > from && block_start < to) {
1794 if (!PageUptodate(page)) {
1795 unsigned start, size;
1796
1797 start = max(from, block_start);
1798 size = min(to, block_end) - start;
1799
1800 zero_user_page(page, start, size, KM_USER0);
1801 set_buffer_uptodate(bh);
1802 }
1803
1804 clear_buffer_new(bh);
1805 mark_buffer_dirty(bh);
1806 }
1807 }
1808
1809 block_start = block_end;
1810 bh = bh->b_this_page;
1811 } while (bh != head);
1812}
1813EXPORT_SYMBOL(page_zero_new_buffers);
1814
1773static int __block_prepare_write(struct inode *inode, struct page *page, 1815static int __block_prepare_write(struct inode *inode, struct page *page,
1774 unsigned from, unsigned to, get_block_t *get_block) 1816 unsigned from, unsigned to, get_block_t *get_block)
1775{ 1817{
@@ -1854,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1854 if (!buffer_uptodate(*wait_bh)) 1896 if (!buffer_uptodate(*wait_bh))
1855 err = -EIO; 1897 err = -EIO;
1856 } 1898 }
1857 if (!err) { 1899 if (unlikely(err))
1858 bh = head; 1900 page_zero_new_buffers(page, from, to);
1859 do {
1860 if (buffer_new(bh))
1861 clear_buffer_new(bh);
1862 } while ((bh = bh->b_this_page) != head);
1863 return 0;
1864 }
1865 /* Error case: */
1866 /*
1867 * Zero out any newly allocated blocks to avoid exposing stale
1868 * data. If BH_New is set, we know that the block was newly
1869 * allocated in the above loop.
1870 */
1871 bh = head;
1872 block_start = 0;
1873 do {
1874 block_end = block_start+blocksize;
1875 if (block_end <= from)
1876 goto next_bh;
1877 if (block_start >= to)
1878 break;
1879 if (buffer_new(bh)) {
1880 clear_buffer_new(bh);
1881 zero_user_page(page, block_start, bh->b_size, KM_USER0);
1882 set_buffer_uptodate(bh);
1883 mark_buffer_dirty(bh);
1884 }
1885next_bh:
1886 block_start = block_end;
1887 bh = bh->b_this_page;
1888 } while (bh != head);
1889 return err; 1901 return err;
1890} 1902}
1891 1903
@@ -1910,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1910 set_buffer_uptodate(bh); 1922 set_buffer_uptodate(bh);
1911 mark_buffer_dirty(bh); 1923 mark_buffer_dirty(bh);
1912 } 1924 }
1925 clear_buffer_new(bh);
1913 } 1926 }
1914 1927
1915 /* 1928 /*
@@ -1924,6 +1937,130 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1924} 1937}
1925 1938
1926/* 1939/*
1940 * block_write_begin takes care of the basic task of block allocation and
1941 * bringing partial write blocks uptodate first.
1942 *
1943 * If *pagep is not NULL, then block_write_begin uses the locked page
1944 * at *pagep rather than allocating its own. In this case, the page will
1945 * not be unlocked or deallocated on failure.
1946 */
1947int block_write_begin(struct file *file, struct address_space *mapping,
1948 loff_t pos, unsigned len, unsigned flags,
1949 struct page **pagep, void **fsdata,
1950 get_block_t *get_block)
1951{
1952 struct inode *inode = mapping->host;
1953 int status = 0;
1954 struct page *page;
1955 pgoff_t index;
1956 unsigned start, end;
1957 int ownpage = 0;
1958
1959 index = pos >> PAGE_CACHE_SHIFT;
1960 start = pos & (PAGE_CACHE_SIZE - 1);
1961 end = start + len;
1962
1963 page = *pagep;
1964 if (page == NULL) {
1965 ownpage = 1;
1966 page = __grab_cache_page(mapping, index);
1967 if (!page) {
1968 status = -ENOMEM;
1969 goto out;
1970 }
1971 *pagep = page;
1972 } else
1973 BUG_ON(!PageLocked(page));
1974
1975 status = __block_prepare_write(inode, page, start, end, get_block);
1976 if (unlikely(status)) {
1977 ClearPageUptodate(page);
1978
1979 if (ownpage) {
1980 unlock_page(page);
1981 page_cache_release(page);
1982 *pagep = NULL;
1983
1984 /*
1985 * prepare_write() may have instantiated a few blocks
1986 * outside i_size. Trim these off again. Don't need
1987 * i_size_read because we hold i_mutex.
1988 */
1989 if (pos + len > inode->i_size)
1990 vmtruncate(inode, inode->i_size);
1991 }
1992 goto out;
1993 }
1994
1995out:
1996 return status;
1997}
1998EXPORT_SYMBOL(block_write_begin);
1999
2000int block_write_end(struct file *file, struct address_space *mapping,
2001 loff_t pos, unsigned len, unsigned copied,
2002 struct page *page, void *fsdata)
2003{
2004 struct inode *inode = mapping->host;
2005 unsigned start;
2006
2007 start = pos & (PAGE_CACHE_SIZE - 1);
2008
2009 if (unlikely(copied < len)) {
2010 /*
2011 * The buffers that were written will now be uptodate, so we
2012 * don't have to worry about a readpage reading them and
2013 * overwriting a partial write. However if we have encountered
2014 * a short write and only partially written into a buffer, it
2015 * will not be marked uptodate, so a readpage might come in and
2016 * destroy our partial write.
2017 *
2018 * Do the simplest thing, and just treat any short write to a
2019 * non uptodate page as a zero-length write, and force the
2020 * caller to redo the whole thing.
2021 */
2022 if (!PageUptodate(page))
2023 copied = 0;
2024
2025 page_zero_new_buffers(page, start+copied, start+len);
2026 }
2027 flush_dcache_page(page);
2028
2029 /* This could be a short (even 0-length) commit */
2030 __block_commit_write(inode, page, start, start+copied);
2031
2032 return copied;
2033}
2034EXPORT_SYMBOL(block_write_end);
2035
2036int generic_write_end(struct file *file, struct address_space *mapping,
2037 loff_t pos, unsigned len, unsigned copied,
2038 struct page *page, void *fsdata)
2039{
2040 struct inode *inode = mapping->host;
2041
2042 copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
2043
2044 /*
2045 * No need to use i_size_read() here, the i_size
2046 * cannot change under us because we hold i_mutex.
2047 *
2048 * But it's important to update i_size while still holding page lock:
2049 * page writeout could otherwise come in and zero beyond i_size.
2050 */
2051 if (pos+copied > inode->i_size) {
2052 i_size_write(inode, pos+copied);
2053 mark_inode_dirty(inode);
2054 }
2055
2056 unlock_page(page);
2057 page_cache_release(page);
2058
2059 return copied;
2060}
2061EXPORT_SYMBOL(generic_write_end);
2062
2063/*
1927 * Generic "read page" function for block devices that have the normal 2064 * Generic "read page" function for block devices that have the normal
1928 * get_block functionality. This is most of the block device filesystems. 2065 * get_block functionality. This is most of the block device filesystems.
1929 * Reads the page asynchronously --- the unlock_buffer() and 2066 * Reads the page asynchronously --- the unlock_buffer() and