aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c265
1 files changed, 74 insertions, 191 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e9..5930e382959b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
156 if (uptodate) { 156 if (uptodate) {
157 set_buffer_uptodate(bh); 157 set_buffer_uptodate(bh);
158 } else { 158 } else {
159 if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { 159 if (!quiet_error(bh)) {
160 buffer_io_error(bh); 160 buffer_io_error(bh);
161 printk(KERN_WARNING "lost page write due to " 161 printk(KERN_WARNING "lost page write due to "
162 "I/O error on %s\n", 162 "I/O error on %s\n",
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
770 spin_unlock(lock); 770 spin_unlock(lock);
771 /* 771 /*
772 * Ensure any pending I/O completes so that 772 * Ensure any pending I/O completes so that
773 * ll_rw_block() actually writes the current 773 * write_dirty_buffer() actually writes the
774 * contents - it is a noop if I/O is still in 774 * current contents - it is a noop if I/O is
775 * flight on potentially older contents. 775 * still in flight on potentially older
776 * contents.
776 */ 777 */
777 ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); 778 write_dirty_buffer(bh, WRITE_SYNC_PLUG);
778 779
779 /* 780 /*
780 * Kick off IO for the previous mapping. Note 781 * Kick off IO for the previous mapping. Note
@@ -904,7 +905,6 @@ try_again:
904 905
905 bh->b_state = 0; 906 bh->b_state = 0;
906 atomic_set(&bh->b_count, 0); 907 atomic_set(&bh->b_count, 0);
907 bh->b_private = NULL;
908 bh->b_size = size; 908 bh->b_size = size;
909 909
910 /* Link the buffer to its page */ 910 /* Link the buffer to its page */
@@ -1705,7 +1705,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1705 * and kswapd activity, but those code paths have their own 1705 * and kswapd activity, but those code paths have their own
1706 * higher-level throttling. 1706 * higher-level throttling.
1707 */ 1707 */
1708 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1708 if (wbc->sync_mode != WB_SYNC_NONE) {
1709 lock_buffer(bh); 1709 lock_buffer(bh);
1710 } else if (!trylock_buffer(bh)) { 1710 } else if (!trylock_buffer(bh)) {
1711 redirty_page_for_writepage(wbc, page); 1711 redirty_page_for_writepage(wbc, page);
@@ -1833,9 +1833,12 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
1833} 1833}
1834EXPORT_SYMBOL(page_zero_new_buffers); 1834EXPORT_SYMBOL(page_zero_new_buffers);
1835 1835
1836static int __block_prepare_write(struct inode *inode, struct page *page, 1836int __block_write_begin(struct page *page, loff_t pos, unsigned len,
1837 unsigned from, unsigned to, get_block_t *get_block) 1837 get_block_t *get_block)
1838{ 1838{
1839 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1840 unsigned to = from + len;
1841 struct inode *inode = page->mapping->host;
1839 unsigned block_start, block_end; 1842 unsigned block_start, block_end;
1840 sector_t block; 1843 sector_t block;
1841 int err = 0; 1844 int err = 0;
@@ -1908,10 +1911,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1908 if (!buffer_uptodate(*wait_bh)) 1911 if (!buffer_uptodate(*wait_bh))
1909 err = -EIO; 1912 err = -EIO;
1910 } 1913 }
1911 if (unlikely(err)) 1914 if (unlikely(err)) {
1912 page_zero_new_buffers(page, from, to); 1915 page_zero_new_buffers(page, from, to);
1916 ClearPageUptodate(page);
1917 }
1913 return err; 1918 return err;
1914} 1919}
1920EXPORT_SYMBOL(__block_write_begin);
1915 1921
1916static int __block_commit_write(struct inode *inode, struct page *page, 1922static int __block_commit_write(struct inode *inode, struct page *page,
1917 unsigned from, unsigned to) 1923 unsigned from, unsigned to)
@@ -1949,90 +1955,32 @@ static int __block_commit_write(struct inode *inode, struct page *page,
1949} 1955}
1950 1956
1951/* 1957/*
1952 * Filesystems implementing the new truncate sequence should use the 1958 * block_write_begin takes care of the basic task of block allocation and
1953 * _newtrunc postfix variant which won't incorrectly call vmtruncate. 1959 * bringing partial write blocks uptodate first.
1960 *
1954 * The filesystem needs to handle block truncation upon failure. 1961 * The filesystem needs to handle block truncation upon failure.
1955 */ 1962 */
1956int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, 1963int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
1957 loff_t pos, unsigned len, unsigned flags, 1964 unsigned flags, struct page **pagep, get_block_t *get_block)
1958 struct page **pagep, void **fsdata,
1959 get_block_t *get_block)
1960{ 1965{
1961 struct inode *inode = mapping->host; 1966 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
1962 int status = 0;
1963 struct page *page; 1967 struct page *page;
1964 pgoff_t index; 1968 int status;
1965 unsigned start, end;
1966 int ownpage = 0;
1967 1969
1968 index = pos >> PAGE_CACHE_SHIFT; 1970 page = grab_cache_page_write_begin(mapping, index, flags);
1969 start = pos & (PAGE_CACHE_SIZE - 1); 1971 if (!page)
1970 end = start + len; 1972 return -ENOMEM;
1971
1972 page = *pagep;
1973 if (page == NULL) {
1974 ownpage = 1;
1975 page = grab_cache_page_write_begin(mapping, index, flags);
1976 if (!page) {
1977 status = -ENOMEM;
1978 goto out;
1979 }
1980 *pagep = page;
1981 } else
1982 BUG_ON(!PageLocked(page));
1983 1973
1984 status = __block_prepare_write(inode, page, start, end, get_block); 1974 status = __block_write_begin(page, pos, len, get_block);
1985 if (unlikely(status)) { 1975 if (unlikely(status)) {
1986 ClearPageUptodate(page); 1976 unlock_page(page);
1987 1977 page_cache_release(page);
1988 if (ownpage) { 1978 page = NULL;
1989 unlock_page(page);
1990 page_cache_release(page);
1991 *pagep = NULL;
1992 }
1993 } 1979 }
1994 1980
1995out: 1981 *pagep = page;
1996 return status; 1982 return status;
1997} 1983}
1998EXPORT_SYMBOL(block_write_begin_newtrunc);
1999
2000/*
2001 * block_write_begin takes care of the basic task of block allocation and
2002 * bringing partial write blocks uptodate first.
2003 *
2004 * If *pagep is not NULL, then block_write_begin uses the locked page
2005 * at *pagep rather than allocating its own. In this case, the page will
2006 * not be unlocked or deallocated on failure.
2007 */
2008int block_write_begin(struct file *file, struct address_space *mapping,
2009 loff_t pos, unsigned len, unsigned flags,
2010 struct page **pagep, void **fsdata,
2011 get_block_t *get_block)
2012{
2013 int ret;
2014
2015 ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
2016 pagep, fsdata, get_block);
2017
2018 /*
2019 * prepare_write() may have instantiated a few blocks
2020 * outside i_size. Trim these off again. Don't need
2021 * i_size_read because we hold i_mutex.
2022 *
2023 * Filesystems which pass down their own page also cannot
2024 * call into vmtruncate here because it would lead to lock
2025 * inversion problems (*pagep is locked). This is a further
2026 * example of where the old truncate sequence is inadequate.
2027 */
2028 if (unlikely(ret) && *pagep == NULL) {
2029 loff_t isize = mapping->host->i_size;
2030 if (pos + len > isize)
2031 vmtruncate(mapping->host, isize);
2032 }
2033
2034 return ret;
2035}
2036EXPORT_SYMBOL(block_write_begin); 1984EXPORT_SYMBOL(block_write_begin);
2037 1985
2038int block_write_end(struct file *file, struct address_space *mapping, 1986int block_write_end(struct file *file, struct address_space *mapping,
@@ -2351,7 +2299,7 @@ out:
2351 * For moronic filesystems that do not allow holes in file. 2299 * For moronic filesystems that do not allow holes in file.
2352 * We may have to extend the file. 2300 * We may have to extend the file.
2353 */ 2301 */
2354int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2302int cont_write_begin(struct file *file, struct address_space *mapping,
2355 loff_t pos, unsigned len, unsigned flags, 2303 loff_t pos, unsigned len, unsigned flags,
2356 struct page **pagep, void **fsdata, 2304 struct page **pagep, void **fsdata,
2357 get_block_t *get_block, loff_t *bytes) 2305 get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2311,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2363 2311
2364 err = cont_expand_zero(file, mapping, pos, bytes); 2312 err = cont_expand_zero(file, mapping, pos, bytes);
2365 if (err) 2313 if (err)
2366 goto out; 2314 return err;
2367 2315
2368 zerofrom = *bytes & ~PAGE_CACHE_MASK; 2316 zerofrom = *bytes & ~PAGE_CACHE_MASK;
2369 if (pos+len > *bytes && zerofrom & (blocksize-1)) { 2317 if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2319,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2371 (*bytes)++; 2319 (*bytes)++;
2372 } 2320 }
2373 2321
2374 *pagep = NULL; 2322 return block_write_begin(mapping, pos, len, flags, pagep, get_block);
2375 err = block_write_begin_newtrunc(file, mapping, pos, len,
2376 flags, pagep, fsdata, get_block);
2377out:
2378 return err;
2379}
2380EXPORT_SYMBOL(cont_write_begin_newtrunc);
2381
2382int cont_write_begin(struct file *file, struct address_space *mapping,
2383 loff_t pos, unsigned len, unsigned flags,
2384 struct page **pagep, void **fsdata,
2385 get_block_t *get_block, loff_t *bytes)
2386{
2387 int ret;
2388
2389 ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
2390 pagep, fsdata, get_block, bytes);
2391 if (unlikely(ret)) {
2392 loff_t isize = mapping->host->i_size;
2393 if (pos + len > isize)
2394 vmtruncate(mapping->host, isize);
2395 }
2396
2397 return ret;
2398} 2323}
2399EXPORT_SYMBOL(cont_write_begin); 2324EXPORT_SYMBOL(cont_write_begin);
2400 2325
2401int block_prepare_write(struct page *page, unsigned from, unsigned to,
2402 get_block_t *get_block)
2403{
2404 struct inode *inode = page->mapping->host;
2405 int err = __block_prepare_write(inode, page, from, to, get_block);
2406 if (err)
2407 ClearPageUptodate(page);
2408 return err;
2409}
2410EXPORT_SYMBOL(block_prepare_write);
2411
2412int block_commit_write(struct page *page, unsigned from, unsigned to) 2326int block_commit_write(struct page *page, unsigned from, unsigned to)
2413{ 2327{
2414 struct inode *inode = page->mapping->host; 2328 struct inode *inode = page->mapping->host;
@@ -2457,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2457 else 2371 else
2458 end = PAGE_CACHE_SIZE; 2372 end = PAGE_CACHE_SIZE;
2459 2373
2460 ret = block_prepare_write(page, 0, end, get_block); 2374 ret = __block_write_begin(page, 0, end, get_block);
2461 if (!ret) 2375 if (!ret)
2462 ret = block_commit_write(page, 0, end); 2376 ret = block_commit_write(page, 0, end);
2463 2377
@@ -2510,11 +2424,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
2510} 2424}
2511 2425
2512/* 2426/*
2513 * Filesystems implementing the new truncate sequence should use the 2427 * On entry, the page is fully not uptodate.
2514 * _newtrunc postfix variant which won't incorrectly call vmtruncate. 2428 * On exit the page is fully uptodate in the areas outside (from,to)
2515 * The filesystem needs to handle block truncation upon failure. 2429 * The filesystem needs to handle block truncation upon failure.
2516 */ 2430 */
2517int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, 2431int nobh_write_begin(struct address_space *mapping,
2518 loff_t pos, unsigned len, unsigned flags, 2432 loff_t pos, unsigned len, unsigned flags,
2519 struct page **pagep, void **fsdata, 2433 struct page **pagep, void **fsdata,
2520 get_block_t *get_block) 2434 get_block_t *get_block)
@@ -2544,11 +2458,10 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
2544 *fsdata = NULL; 2458 *fsdata = NULL;
2545 2459
2546 if (page_has_buffers(page)) { 2460 if (page_has_buffers(page)) {
2547 unlock_page(page); 2461 ret = __block_write_begin(page, pos, len, get_block);
2548 page_cache_release(page); 2462 if (unlikely(ret))
2549 *pagep = NULL; 2463 goto out_release;
2550 return block_write_begin_newtrunc(file, mapping, pos, len, 2464 return ret;
2551 flags, pagep, fsdata, get_block);
2552 } 2465 }
2553 2466
2554 if (PageMappedToDisk(page)) 2467 if (PageMappedToDisk(page))
@@ -2654,35 +2567,6 @@ out_release:
2654 2567
2655 return ret; 2568 return ret;
2656} 2569}
2657EXPORT_SYMBOL(nobh_write_begin_newtrunc);
2658
2659/*
2660 * On entry, the page is fully not uptodate.
2661 * On exit the page is fully uptodate in the areas outside (from,to)
2662 */
2663int nobh_write_begin(struct file *file, struct address_space *mapping,
2664 loff_t pos, unsigned len, unsigned flags,
2665 struct page **pagep, void **fsdata,
2666 get_block_t *get_block)
2667{
2668 int ret;
2669
2670 ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
2671 pagep, fsdata, get_block);
2672
2673 /*
2674 * prepare_write() may have instantiated a few blocks
2675 * outside i_size. Trim these off again. Don't need
2676 * i_size_read because we hold i_mutex.
2677 */
2678 if (unlikely(ret)) {
2679 loff_t isize = mapping->host->i_size;
2680 if (pos + len > isize)
2681 vmtruncate(mapping->host, isize);
2682 }
2683
2684 return ret;
2685}
2686EXPORT_SYMBOL(nobh_write_begin); 2570EXPORT_SYMBOL(nobh_write_begin);
2687 2571
2688int nobh_write_end(struct file *file, struct address_space *mapping, 2572int nobh_write_end(struct file *file, struct address_space *mapping,
@@ -2998,7 +2882,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err)
2998 2882
2999 if (err == -EOPNOTSUPP) { 2883 if (err == -EOPNOTSUPP) {
3000 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); 2884 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
3001 set_bit(BH_Eopnotsupp, &bh->b_state);
3002 } 2885 }
3003 2886
3004 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) 2887 if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags)))
@@ -3020,13 +2903,6 @@ int submit_bh(int rw, struct buffer_head * bh)
3020 BUG_ON(buffer_unwritten(bh)); 2903 BUG_ON(buffer_unwritten(bh));
3021 2904
3022 /* 2905 /*
3023 * Mask in barrier bit for a write (could be either a WRITE or a
3024 * WRITE_SYNC
3025 */
3026 if (buffer_ordered(bh) && (rw & WRITE))
3027 rw |= WRITE_BARRIER;
3028
3029 /*
3030 * Only clear out a write error when rewriting 2906 * Only clear out a write error when rewriting
3031 */ 2907 */
3032 if (test_set_buffer_req(bh) && (rw & WRITE)) 2908 if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -3064,22 +2940,21 @@ EXPORT_SYMBOL(submit_bh);
3064 2940
3065/** 2941/**
3066 * ll_rw_block: low-level access to block devices (DEPRECATED) 2942 * ll_rw_block: low-level access to block devices (DEPRECATED)
3067 * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) 2943 * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
3068 * @nr: number of &struct buffer_heads in the array 2944 * @nr: number of &struct buffer_heads in the array
3069 * @bhs: array of pointers to &struct buffer_head 2945 * @bhs: array of pointers to &struct buffer_head
3070 * 2946 *
3071 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and 2947 * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
3072 * requests an I/O operation on them, either a %READ or a %WRITE. The third 2948 * requests an I/O operation on them, either a %READ or a %WRITE. The third
3073 * %SWRITE is like %WRITE only we make sure that the *current* data in buffers 2949 * %READA option is described in the documentation for generic_make_request()
3074 * are sent to disk. The fourth %READA option is described in the documentation 2950 * which ll_rw_block() calls.
3075 * for generic_make_request() which ll_rw_block() calls.
3076 * 2951 *
3077 * This function drops any buffer that it cannot get a lock on (with the 2952 * This function drops any buffer that it cannot get a lock on (with the
3078 * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be 2953 * BH_Lock state bit), any buffer that appears to be clean when doing a write
3079 * clean when doing a write request, and any buffer that appears to be 2954 * request, and any buffer that appears to be up-to-date when doing read
3080 * up-to-date when doing read request. Further it marks as clean buffers that 2955 * request. Further it marks as clean buffers that are processed for
3081 * are processed for writing (the buffer cache won't assume that they are 2956 * writing (the buffer cache won't assume that they are actually clean
3082 * actually clean until the buffer gets unlocked). 2957 * until the buffer gets unlocked).
3083 * 2958 *
3084 * ll_rw_block sets b_end_io to simple completion handler that marks 2959 * ll_rw_block sets b_end_io to simple completion handler that marks
3085 * the buffer up-to-date (if approriate), unlocks the buffer and wakes 2960 * the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -3095,20 +2970,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3095 for (i = 0; i < nr; i++) { 2970 for (i = 0; i < nr; i++) {
3096 struct buffer_head *bh = bhs[i]; 2971 struct buffer_head *bh = bhs[i];
3097 2972
3098 if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) 2973 if (!trylock_buffer(bh))
3099 lock_buffer(bh);
3100 else if (!trylock_buffer(bh))
3101 continue; 2974 continue;
3102 2975 if (rw == WRITE) {
3103 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
3104 rw == SWRITE_SYNC_PLUG) {
3105 if (test_clear_buffer_dirty(bh)) { 2976 if (test_clear_buffer_dirty(bh)) {
3106 bh->b_end_io = end_buffer_write_sync; 2977 bh->b_end_io = end_buffer_write_sync;
3107 get_bh(bh); 2978 get_bh(bh);
3108 if (rw == SWRITE_SYNC) 2979 submit_bh(WRITE, bh);
3109 submit_bh(WRITE_SYNC, bh);
3110 else
3111 submit_bh(WRITE, bh);
3112 continue; 2980 continue;
3113 } 2981 }
3114 } else { 2982 } else {
@@ -3124,12 +2992,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
3124} 2992}
3125EXPORT_SYMBOL(ll_rw_block); 2993EXPORT_SYMBOL(ll_rw_block);
3126 2994
2995void write_dirty_buffer(struct buffer_head *bh, int rw)
2996{
2997 lock_buffer(bh);
2998 if (!test_clear_buffer_dirty(bh)) {
2999 unlock_buffer(bh);
3000 return;
3001 }
3002 bh->b_end_io = end_buffer_write_sync;
3003 get_bh(bh);
3004 submit_bh(rw, bh);
3005}
3006EXPORT_SYMBOL(write_dirty_buffer);
3007
3127/* 3008/*
3128 * For a data-integrity writeout, we need to wait upon any in-progress I/O 3009 * For a data-integrity writeout, we need to wait upon any in-progress I/O
3129 * and then start new I/O and then wait upon it. The caller must have a ref on 3010 * and then start new I/O and then wait upon it. The caller must have a ref on
3130 * the buffer_head. 3011 * the buffer_head.
3131 */ 3012 */
3132int sync_dirty_buffer(struct buffer_head *bh) 3013int __sync_dirty_buffer(struct buffer_head *bh, int rw)
3133{ 3014{
3134 int ret = 0; 3015 int ret = 0;
3135 3016
@@ -3138,12 +3019,8 @@ int sync_dirty_buffer(struct buffer_head *bh)
3138 if (test_clear_buffer_dirty(bh)) { 3019 if (test_clear_buffer_dirty(bh)) {
3139 get_bh(bh); 3020 get_bh(bh);
3140 bh->b_end_io = end_buffer_write_sync; 3021 bh->b_end_io = end_buffer_write_sync;
3141 ret = submit_bh(WRITE_SYNC, bh); 3022 ret = submit_bh(rw, bh);
3142 wait_on_buffer(bh); 3023 wait_on_buffer(bh);
3143 if (buffer_eopnotsupp(bh)) {
3144 clear_buffer_eopnotsupp(bh);
3145 ret = -EOPNOTSUPP;
3146 }
3147 if (!ret && !buffer_uptodate(bh)) 3024 if (!ret && !buffer_uptodate(bh))
3148 ret = -EIO; 3025 ret = -EIO;
3149 } else { 3026 } else {
@@ -3151,6 +3028,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
3151 } 3028 }
3152 return ret; 3029 return ret;
3153} 3030}
3031EXPORT_SYMBOL(__sync_dirty_buffer);
3032
3033int sync_dirty_buffer(struct buffer_head *bh)
3034{
3035 return __sync_dirty_buffer(bh, WRITE_SYNC);
3036}
3154EXPORT_SYMBOL(sync_dirty_buffer); 3037EXPORT_SYMBOL(sync_dirty_buffer);
3155 3038
3156/* 3039/*