aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c249
1 files changed, 73 insertions, 176 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e9..3e7dca279d1c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
770 spin_unlock(lock); 770 spin_unlock(lock);
771 /* 771 /*
772 * Ensure any pending I/O completes so that 772 * Ensure any pending I/O completes so that
773 * ll_rw_block() actually writes the current 773 * write_dirty_buffer() actually writes the
774 * contents - it is a noop if I/O is still in 774 * current contents - it is a noop if I/O is
775 * flight on potentially older contents. 775 * still in flight on potentially older
776 * contents.
776 */ 777 */
777 ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); 778 write_dirty_buffer(bh, WRITE_SYNC_PLUG);
778 779
779 /* 780 /*
780 * Kick off IO for the previous mapping. Note 781 * Kick off IO for the previous mapping. Note
@@ -1833,9 +1834,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1833} 1834}
1834EXPORT_SYMBOL(page_zero_new_buffers); 1835EXPORT_SYMBOL(page_zero_new_buffers);
1835 1836
1836static int __block_prepare_write(struct inode *inode, struct page *page, 1837int block_prepare_write(struct page *page, unsigned from, unsigned to,
1837 unsigned from, unsigned to, get_block_t *get_block) 1838 get_block_t *get_block)
1838{ 1839{
1840 struct inode *inode = page->mapping->host;
1839 unsigned block_start, block_end; 1841 unsigned block_start, block_end;
1840 sector_t block; 1842 sector_t block;
1841 int err = 0; 1843 int err = 0;
@@ -1908,10 +1910,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1908 if (!buffer_uptodate(*wait_bh)) 1910 if (!buffer_uptodate(*wait_bh))
1909 err = -EIO; 1911 err = -EIO;
1910 } 1912 }
1911 if (unlikely(err)) 1913 if (unlikely(err)) {
1912 page_zero_new_buffers(page, from, to); 1914 page_zero_new_buffers(page, from, to);
1915 ClearPageUptodate(page);
1916 }
1913 return err; 1917 return err;
1914} 1918}
1919EXPORT_SYMBOL(block_prepare_write);
1915 1920
1916static int __block_commit_write(struct inode *inode, struct page *page, 1921static int __block_commit_write(struct inode *inode, struct page *page,
1917 unsigned from, unsigned to) 1922 unsigned from, unsigned to)
@@ -1948,90 +1953,41 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1948 return 0; 1953 return 0;
1949} 1954}
1950 1955
1951/* 1956int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1952 * Filesystems implementing the new truncate sequence should use the 1957 get_block_t *get_block)
1953 * _newtrunc postfix variant which won't incorrectly call vmtruncate.
1954 * The filesystem needs to handle block truncation upon failure.
1955 */
1956int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
1957 loff_t pos, unsigned len, unsigned flags,
1958 struct page **pagep, void **fsdata,
1959 get_block_t *get_block)
1960{ 1958{
1961 struct inode *inode = mapping->host; 1959 unsigned start = pos & (PAGE_CACHE_SIZE - 1);
1962 int status = 0;
1963 struct page *page;
1964 pgoff_t index;
1965 unsigned start, end;
1966 int ownpage = 0;
1967 1960
1968 index = pos >> PAGE_CACHE_SHIFT; 1961 return block_prepare_write(page, start, start + len, get_block);
1969 start = pos & (PAGE_CACHE_SIZE - 1);
1970 end = start + len;
1971
1972 page = *pagep;
1973 if (page == NULL) {
1974 ownpage = 1;
1975 page = grab_cache_page_write_begin(mapping, index, flags);
1976 if (!page) {
1977 status = -ENOMEM;
1978 goto out;
1979 }
1980 *pagep = page;
1981 } else
1982 BUG_ON(!PageLocked(page));
1983
1984 status = __block_prepare_write(inode, page, start, end, get_block);
1985 if (unlikely(status)) {
1986 ClearPageUptodate(page);
1987
1988 if (ownpage) {
1989 unlock_page(page);
1990 page_cache_release(page);
1991 *pagep = NULL;
1992 }
1993 }
1994
1995out:
1996 return status;
1997} 1962}
1998EXPORT_SYMBOL(block_write_begin_newtrunc); 1963EXPORT_SYMBOL(__block_write_begin);
1999 1964
2000/* 1965/*
2001 * block_write_begin takes care of the basic task of block allocation and 1966 * block_write_begin takes care of the basic task of block allocation and
2002 * bringing partial write blocks uptodate first. 1967 * bringing partial write blocks uptodate first.
2003 * 1968 *
2004 * If *pagep is not NULL, then block_write_begin uses the locked page 1969 * The filesystem needs to handle block truncation upon failure.
2005 * at *pagep rather than allocating its own. In this case, the page will
2006 * not be unlocked or deallocated on failure.
2007 */ 1970 */
2008int block_write_begin(struct file *file, struct address_space *mapping, 1971int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
2009 loff_t pos, unsigned len, unsigned flags, 1972 unsigned flags, struct page **pagep, get_block_t *get_block)
2010 struct page **pagep, void **fsdata,
2011 get_block_t *get_block)
2012{ 1973{
2013 int ret; 1974 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1975 struct page *page;
1976 int status;
2014 1977
2015 ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, 1978 page = grab_cache_page_write_begin(mapping, index, flags);
2016 pagep, fsdata, get_block); 1979 if (!page)
1980 return -ENOMEM;
2017 1981
2018 /* 1982 status = __block_write_begin(page, pos, len, get_block);
2019 * prepare_write() may have instantiated a few blocks 1983 if (unlikely(status)) {
2020 * outside i_size. Trim these off again. Don't need 1984 unlock_page(page);
2021 * i_size_read because we hold i_mutex. 1985 page_cache_release(page);
2022 * 1986 page = NULL;
2023 * Filesystems which pass down their own page also cannot
2024 * call into vmtruncate here because it would lead to lock
2025 * inversion problems (*pagep is locked). This is a further
2026 * example of where the old truncate sequence is inadequate.
2027 */
2028 if (unlikely(ret) && *pagep == NULL) {
2029 loff_t isize = mapping->host->i_size;
2030 if (pos + len > isize)
2031 vmtruncate(mapping->host, isize);
2032 } 1987 }
2033 1988
2034 return ret; 1989 *pagep = page;
1990 return status;
2035} 1991}
2036EXPORT_SYMBOL(block_write_begin); 1992EXPORT_SYMBOL(block_write_begin);
2037 1993
@@ -2351,7 +2307,7 @@ out:
2351 * For moronic filesystems that do not allow holes in file. 2307 * For moronic filesystems that do not allow holes in file.
2352 * We may have to extend the file. 2308 * We may have to extend the file.
2353 */ 2309 */
2354int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2310int cont_write_begin(struct file *file, struct address_space *mapping,
2355 loff_t pos, unsigned len, unsigned flags, 2311 loff_t pos, unsigned len, unsigned flags,
2356 struct page **pagep, void **fsdata, 2312 struct page **pagep, void **fsdata,
2357 get_block_t *get_block, loff_t *bytes) 2313 get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2319,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2363 2319
2364 err = cont_expand_zero(file, mapping, pos, bytes); 2320 err = cont_expand_zero(file, mapping, pos, bytes);
2365 if (err) 2321 if (err)
2366 goto out; 2322 return err;
2367 2323
2368 zerofrom = *bytes & ~PAGE_CACHE_MASK; 2324 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2369 if (pos+len > *bytes && zerofrom & (blocksize-1)) { 2325 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2327,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2371 (*bytes)++; 2327 (*bytes)++;
2372 } 2328 }
2373 2329
2374 *pagep = NULL; 2330 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2375 err = block_write_begin_newtrunc(file, mapping, pos, len,
2376 flags, pagep, fsdata, get_block);
2377out:
2378 return err;
2379}
2380EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382int cont_write_begin(struct file *file, struct address_space *mapping,
2383 loff_t pos, unsigned len, unsigned flags,
2384 struct page **pagep, void **fsdata,
2385 get_block_t *get_block, loff_t *bytes)
2386{
2387 int ret;
2388
2389 ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390 pagep, fsdata, get_block, bytes);
2391 if (unlikely(ret)) {
2392 loff_t isize = mapping->host->i_size;
2393 if (pos + len > isize)
2394 vmtruncate(mapping->host, isize);
2395 }
2396
2397 return ret;
2398} 2331}
2399EXPORT_SYMBOL(cont_write_begin); 2332EXPORT_SYMBOL(cont_write_begin);
2400 2333
2401int block_prepare_write(struct page *page, unsigned from, unsigned to,
2402 get_block_t *get_block)
2403{
2404 struct inode *inode = page->mapping->host;
2405 int err = __block_prepare_write(inode, page, from, to, get_block);
2406 if (err)
2407 ClearPageUptodate(page);
2408 return err;
2409}
2410EXPORT_SYMBOL(block_prepare_write);
2411
2412int block_commit_write(struct page *page, unsigned from, unsigned to) 2334int block_commit_write(struct page *page, unsigned from, unsigned to)
2413{ 2335{
2414 struct inode *inode = page->mapping->host; 2336 struct inode *inode = page->mapping->host;
@@ -2510,11 +2432,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2510} 2432}
2511 2433
2512/* 2434/*
2513 * Filesystems implementing the new truncate sequence should use the 2435 * On entry, the page is fully not uptodate.
2514 * _newtrunc postfix variant which won't incorrectly call vmtruncate. 2436 * On exit the page is fully uptodate in the areas outside (from,to)
2515 * The filesystem needs to handle block truncation upon failure. 2437 * The filesystem needs to handle block truncation upon failure.
2516 */ 2438 */
2517int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2439int nobh_write_begin(struct address_space *mapping,
2518 loff_t pos, unsigned len, unsigned flags, 2440 loff_t pos, unsigned len, unsigned flags,
2519 struct page **pagep, void **fsdata, 2441 struct page **pagep, void **fsdata,
2520 get_block_t *get_block) 2442 get_block_t *get_block)
@@ -2547,8 +2469,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2547 unlock_page(page); 2469 unlock_page(page);
2548 page_cache_release(page); 2470 page_cache_release(page);
2549 *pagep = NULL; 2471 *pagep = NULL;
2550 return block_write_begin_newtrunc(file, mapping, pos, len, 2472 return block_write_begin(mapping, pos, len, flags, pagep,
2551 flags, pagep, fsdata, get_block); 2473 get_block);
2552 } 2474 }
2553 2475
2554 if (PageMappedToDisk(page)) 2476 if (PageMappedToDisk(page))
@@ -2654,35 +2576,6 @@ out_release:
2654 2576
2655 return ret; 2577 return ret;
2656} 2578}
2657EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659/*
2660 * On entry, the page is fully not uptodate.
2661 * On exit the page is fully uptodate in the areas outside (from,to)
2662 */
2663int nobh_write_begin(struct file *file, struct address_space *mapping,
2664 loff_t pos, unsigned len, unsigned flags,
2665 struct page **pagep, void **fsdata,
2666 get_block_t *get_block)
2667{
2668 int ret;
2669
2670 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671 pagep, fsdata, get_block);
2672
2673 /*
2674 * prepare_write() may have instantiated a few blocks
2675 * outside i_size. Trim these off again. Don't need
2676 * i_size_read because we hold i_mutex.
2677 */
2678 if (unlikely(ret)) {
2679 loff_t isize = mapping->host->i_size;
2680 if (pos + len > isize)
2681 vmtruncate(mapping->host, isize);
2682 }
2683
2684 return ret;
2685}
2686EXPORT_SYMBOL(nobh_write_begin); 2579EXPORT_SYMBOL(nobh_write_begin);
2687 2580
2688int nobh_write_end(struct file *file, struct address_space *mapping, 2581int nobh_write_end(struct file *file, struct address_space *mapping,
@@ -3020,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh)
3020 BUG_ON(buffer_unwritten(bh)); 2913 BUG_ON(buffer_unwritten(bh));
3021 2914
3022 /* 2915 /*
3023 * Mask in barrier bit for a write (could be either a WRITE or a
3024 * WRITE_SYNC
3025 */
3026 if (buffer_ordered(bh) && (rw & WRITE))
3027 rw |= WRITE_BARRIER;
3028
3029 /*
3030 * Only clear out a write error when rewriting 2916 * Only clear out a write error when rewriting
3031 */ 2917 */
3032 if (test_set_buffer_req(bh) && (rw & WRITE)) 2918 if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -3064,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
3064 2950
3065/** 2951/**
3066 * ll_rw_block: low-level access to block devices (DEPRECATED) 2952 * ll_rw_block: low-level access to block devices (DEPRECATED)
3067 * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) 2953 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
3068 * @nr: number of &struct buffer_heads in the array 2954 * @nr: number of &struct buffer_heads in the array
3069 * @bhs: array of pointers to &struct buffer_head 2955 * @bhs: array of pointers to &struct buffer_head
3070 * 2956 *
3071 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and 2957 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
3072 * requests an I/O operation on them, either a %READ or a %WRITE. The third 2958 * requests an I/O operation on them, either a %READ or a %WRITE. The third
3073 * %SWRITE is like %WRITE only we make sure that the *current* data in buffers 2959 * %READA option is described in the documentation for generic_make_request()
3074 * are sent to disk. The fourth %READA option is described in the documentation 2960 * which ll_rw_block() calls.
3075 * for generic_make_request() which ll_rw_block() calls.
3076 * 2961 *
3077 * This function drops any buffer that it cannot get a lock on (with the 2962 * This function drops any buffer that it cannot get a lock on (with the
3078 * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be 2963 * BH_Lock state bit), any buffer that appears to be clean when doing a write
3079 * clean when doing a write request, and any buffer that appears to be 2964 * request, and any buffer that appears to be up-to-date when doing read
3080 * up-to-date when doing read request. Further it marks as clean buffers that 2965 * request. Further it marks as clean buffers that are processed for
3081 * are processed for writing (the buffer cache won't assume that they are 2966 * writing (the buffer cache won't assume that they are actually clean
3082 * actually clean until the buffer gets unlocked). 2967 * until the buffer gets unlocked).
3083 * 2968 *
3084 * ll_rw_block sets b_end_io to simple completion handler that marks 2969 * ll_rw_block sets b_end_io to simple completion handler that marks
3085 * the buffer up-to-date (if approriate), unlocks the buffer and wakes 2970 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -3095,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3095 for (i = 0; i < nr; i++) { 2980 for (i = 0; i < nr; i++) {
3096 struct buffer_head *bh = bhs[i]; 2981 struct buffer_head *bh = bhs[i];
3097 2982
3098 if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) 2983 if (!trylock_buffer(bh))
3099 lock_buffer(bh);
3100 else if (!trylock_buffer(bh))
3101 continue; 2984 continue;
3102 2985 if (rw == WRITE) {
3103 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
3104 rw == SWRITE_SYNC_PLUG) {
3105 if (test_clear_buffer_dirty(bh)) { 2986 if (test_clear_buffer_dirty(bh)) {
3106 bh->b_end_io = end_buffer_write_sync; 2987 bh->b_end_io = end_buffer_write_sync;
3107 get_bh(bh); 2988 get_bh(bh);
3108 if (rw == SWRITE_SYNC) 2989 submit_bh(WRITE, bh);
3109 submit_bh(WRITE_SYNC, bh);
3110 else
3111 submit_bh(WRITE, bh);
3112 continue; 2990 continue;
3113 } 2991 }
3114 } else { 2992 } else {
@@ -3124,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3124} 3002}
3125EXPORT_SYMBOL(ll_rw_block); 3003EXPORT_SYMBOL(ll_rw_block);
3126 3004
3005void write_dirty_buffer(struct buffer_head *bh, int rw)
3006{
3007 lock_buffer(bh);
3008 if (!test_clear_buffer_dirty(bh)) {
3009 unlock_buffer(bh);
3010 return;
3011 }
3012 bh->b_end_io = end_buffer_write_sync;
3013 get_bh(bh);
3014 submit_bh(rw, bh);
3015}
3016EXPORT_SYMBOL(write_dirty_buffer);
3017
3127/* 3018/*
3128 * For a data-integrity writeout, we need to wait upon any in-progress I/O 3019 * For a data-integrity writeout, we need to wait upon any in-progress I/O
3129 * and then start new I/O and then wait upon it. The caller must have a ref on 3020 * and then start new I/O and then wait upon it. The caller must have a ref on
3130 * the buffer_head. 3021 * the buffer_head.
3131 */ 3022 */
3132int sync_dirty_buffer(struct buffer_head *bh) 3023int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3133{ 3024{
3134 int ret = 0; 3025 int ret = 0;
3135 3026
@@ -3138,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
3138 if (test_clear_buffer_dirty(bh)) { 3029 if (test_clear_buffer_dirty(bh)) {
3139 get_bh(bh); 3030 get_bh(bh);
3140 bh->b_end_io = end_buffer_write_sync; 3031 bh->b_end_io = end_buffer_write_sync;
3141 ret = submit_bh(WRITE_SYNC, bh); 3032 ret = submit_bh(rw, bh);
3142 wait_on_buffer(bh); 3033 wait_on_buffer(bh);
3143 if (buffer_eopnotsupp(bh)) { 3034 if (buffer_eopnotsupp(bh)) {
3144 clear_buffer_eopnotsupp(bh); 3035 clear_buffer_eopnotsupp(bh);
@@ -3151,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
3151 } 3042 }
3152 return ret; 3043 return ret;
3153} 3044}
3045EXPORT_SYMBOL(__sync_dirty_buffer);
3046
3047int sync_dirty_buffer(struct buffer_head *bh)
3048{
3049 return __sync_dirty_buffer(bh, WRITE_SYNC);
3050}
3154EXPORT_SYMBOL(sync_dirty_buffer); 3051EXPORT_SYMBOL(sync_dirty_buffer);
3155 3052
3156/* 3053/*