diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 249 |
1 files changed, 73 insertions, 176 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index d54812b198e9..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
770 | spin_unlock(lock); | 770 | spin_unlock(lock); |
771 | /* | 771 | /* |
772 | * Ensure any pending I/O completes so that | 772 | * Ensure any pending I/O completes so that |
773 | * ll_rw_block() actually writes the current | 773 | * write_dirty_buffer() actually writes the |
774 | * contents - it is a noop if I/O is still in | 774 | * current contents - it is a noop if I/O is |
775 | * flight on potentially older contents. | 775 | * still in flight on potentially older |
776 | * contents. | ||
776 | */ | 777 | */ |
777 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); | 778 | write_dirty_buffer(bh, WRITE_SYNC_PLUG); |
778 | 779 | ||
779 | /* | 780 | /* |
780 | * Kick off IO for the previous mapping. Note | 781 | * Kick off IO for the previous mapping. Note |
@@ -1833,9 +1834,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) | |||
1833 | } | 1834 | } |
1834 | EXPORT_SYMBOL(page_zero_new_buffers); | 1835 | EXPORT_SYMBOL(page_zero_new_buffers); |
1835 | 1836 | ||
1836 | static int __block_prepare_write(struct inode *inode, struct page *page, | 1837 | int block_prepare_write(struct page *page, unsigned from, unsigned to, |
1837 | unsigned from, unsigned to, get_block_t *get_block) | 1838 | get_block_t *get_block) |
1838 | { | 1839 | { |
1840 | struct inode *inode = page->mapping->host; | ||
1839 | unsigned block_start, block_end; | 1841 | unsigned block_start, block_end; |
1840 | sector_t block; | 1842 | sector_t block; |
1841 | int err = 0; | 1843 | int err = 0; |
@@ -1908,10 +1910,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page, | |||
1908 | if (!buffer_uptodate(*wait_bh)) | 1910 | if (!buffer_uptodate(*wait_bh)) |
1909 | err = -EIO; | 1911 | err = -EIO; |
1910 | } | 1912 | } |
1911 | if (unlikely(err)) | 1913 | if (unlikely(err)) { |
1912 | page_zero_new_buffers(page, from, to); | 1914 | page_zero_new_buffers(page, from, to); |
1915 | ClearPageUptodate(page); | ||
1916 | } | ||
1913 | return err; | 1917 | return err; |
1914 | } | 1918 | } |
1919 | EXPORT_SYMBOL(block_prepare_write); | ||
1915 | 1920 | ||
1916 | static int __block_commit_write(struct inode *inode, struct page *page, | 1921 | static int __block_commit_write(struct inode *inode, struct page *page, |
1917 | unsigned from, unsigned to) | 1922 | unsigned from, unsigned to) |
@@ -1948,90 +1953,41 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1948 | return 0; | 1953 | return 0; |
1949 | } | 1954 | } |
1950 | 1955 | ||
1951 | /* | 1956 | int __block_write_begin(struct page *page, loff_t pos, unsigned len, |
1952 | * Filesystems implementing the new truncate sequence should use the | 1957 | get_block_t *get_block) |
1953 | * _newtrunc postfix variant which won't incorrectly call vmtruncate. | ||
1954 | * The filesystem needs to handle block truncation upon failure. | ||
1955 | */ | ||
1956 | int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, | ||
1957 | loff_t pos, unsigned len, unsigned flags, | ||
1958 | struct page **pagep, void **fsdata, | ||
1959 | get_block_t *get_block) | ||
1960 | { | 1958 | { |
1961 | struct inode *inode = mapping->host; | 1959 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); |
1962 | int status = 0; | ||
1963 | struct page *page; | ||
1964 | pgoff_t index; | ||
1965 | unsigned start, end; | ||
1966 | int ownpage = 0; | ||
1967 | 1960 | ||
1968 | index = pos >> PAGE_CACHE_SHIFT; | 1961 | return block_prepare_write(page, start, start + len, get_block); |
1969 | start = pos & (PAGE_CACHE_SIZE - 1); | ||
1970 | end = start + len; | ||
1971 | |||
1972 | page = *pagep; | ||
1973 | if (page == NULL) { | ||
1974 | ownpage = 1; | ||
1975 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
1976 | if (!page) { | ||
1977 | status = -ENOMEM; | ||
1978 | goto out; | ||
1979 | } | ||
1980 | *pagep = page; | ||
1981 | } else | ||
1982 | BUG_ON(!PageLocked(page)); | ||
1983 | |||
1984 | status = __block_prepare_write(inode, page, start, end, get_block); | ||
1985 | if (unlikely(status)) { | ||
1986 | ClearPageUptodate(page); | ||
1987 | |||
1988 | if (ownpage) { | ||
1989 | unlock_page(page); | ||
1990 | page_cache_release(page); | ||
1991 | *pagep = NULL; | ||
1992 | } | ||
1993 | } | ||
1994 | |||
1995 | out: | ||
1996 | return status; | ||
1997 | } | 1962 | } |
1998 | EXPORT_SYMBOL(block_write_begin_newtrunc); | 1963 | EXPORT_SYMBOL(__block_write_begin); |
1999 | 1964 | ||
2000 | /* | 1965 | /* |
2001 | * block_write_begin takes care of the basic task of block allocation and | 1966 | * block_write_begin takes care of the basic task of block allocation and |
2002 | * bringing partial write blocks uptodate first. | 1967 | * bringing partial write blocks uptodate first. |
2003 | * | 1968 | * |
2004 | * If *pagep is not NULL, then block_write_begin uses the locked page | 1969 | * The filesystem needs to handle block truncation upon failure. |
2005 | * at *pagep rather than allocating its own. In this case, the page will | ||
2006 | * not be unlocked or deallocated on failure. | ||
2007 | */ | 1970 | */ |
2008 | int block_write_begin(struct file *file, struct address_space *mapping, | 1971 | int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, |
2009 | loff_t pos, unsigned len, unsigned flags, | 1972 | unsigned flags, struct page **pagep, get_block_t *get_block) |
2010 | struct page **pagep, void **fsdata, | ||
2011 | get_block_t *get_block) | ||
2012 | { | 1973 | { |
2013 | int ret; | 1974 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
1975 | struct page *page; | ||
1976 | int status; | ||
2014 | 1977 | ||
2015 | ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, | 1978 | page = grab_cache_page_write_begin(mapping, index, flags); |
2016 | pagep, fsdata, get_block); | 1979 | if (!page) |
1980 | return -ENOMEM; | ||
2017 | 1981 | ||
2018 | /* | 1982 | status = __block_write_begin(page, pos, len, get_block); |
2019 | * prepare_write() may have instantiated a few blocks | 1983 | if (unlikely(status)) { |
2020 | * outside i_size. Trim these off again. Don't need | 1984 | unlock_page(page); |
2021 | * i_size_read because we hold i_mutex. | 1985 | page_cache_release(page); |
2022 | * | 1986 | page = NULL; |
2023 | * Filesystems which pass down their own page also cannot | ||
2024 | * call into vmtruncate here because it would lead to lock | ||
2025 | * inversion problems (*pagep is locked). This is a further | ||
2026 | * example of where the old truncate sequence is inadequate. | ||
2027 | */ | ||
2028 | if (unlikely(ret) && *pagep == NULL) { | ||
2029 | loff_t isize = mapping->host->i_size; | ||
2030 | if (pos + len > isize) | ||
2031 | vmtruncate(mapping->host, isize); | ||
2032 | } | 1987 | } |
2033 | 1988 | ||
2034 | return ret; | 1989 | *pagep = page; |
1990 | return status; | ||
2035 | } | 1991 | } |
2036 | EXPORT_SYMBOL(block_write_begin); | 1992 | EXPORT_SYMBOL(block_write_begin); |
2037 | 1993 | ||
@@ -2351,7 +2307,7 @@ out: | |||
2351 | * For moronic filesystems that do not allow holes in file. | 2307 | * For moronic filesystems that do not allow holes in file. |
2352 | * We may have to extend the file. | 2308 | * We may have to extend the file. |
2353 | */ | 2309 | */ |
2354 | int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, | 2310 | int cont_write_begin(struct file *file, struct address_space *mapping, |
2355 | loff_t pos, unsigned len, unsigned flags, | 2311 | loff_t pos, unsigned len, unsigned flags, |
2356 | struct page **pagep, void **fsdata, | 2312 | struct page **pagep, void **fsdata, |
2357 | get_block_t *get_block, loff_t *bytes) | 2313 | get_block_t *get_block, loff_t *bytes) |
@@ -2363,7 +2319,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, | |||
2363 | 2319 | ||
2364 | err = cont_expand_zero(file, mapping, pos, bytes); | 2320 | err = cont_expand_zero(file, mapping, pos, bytes); |
2365 | if (err) | 2321 | if (err) |
2366 | goto out; | 2322 | return err; |
2367 | 2323 | ||
2368 | zerofrom = *bytes & ~PAGE_CACHE_MASK; | 2324 | zerofrom = *bytes & ~PAGE_CACHE_MASK; |
2369 | if (pos+len > *bytes && zerofrom & (blocksize-1)) { | 2325 | if (pos+len > *bytes && zerofrom & (blocksize-1)) { |
@@ -2371,44 +2327,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, | |||
2371 | (*bytes)++; | 2327 | (*bytes)++; |
2372 | } | 2328 | } |
2373 | 2329 | ||
2374 | *pagep = NULL; | 2330 | return block_write_begin(mapping, pos, len, flags, pagep, get_block); |
2375 | err = block_write_begin_newtrunc(file, mapping, pos, len, | ||
2376 | flags, pagep, fsdata, get_block); | ||
2377 | out: | ||
2378 | return err; | ||
2379 | } | ||
2380 | EXPORT_SYMBOL(cont_write_begin_newtrunc); | ||
2381 | |||
2382 | int cont_write_begin(struct file *file, struct address_space *mapping, | ||
2383 | loff_t pos, unsigned len, unsigned flags, | ||
2384 | struct page **pagep, void **fsdata, | ||
2385 | get_block_t *get_block, loff_t *bytes) | ||
2386 | { | ||
2387 | int ret; | ||
2388 | |||
2389 | ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2390 | pagep, fsdata, get_block, bytes); | ||
2391 | if (unlikely(ret)) { | ||
2392 | loff_t isize = mapping->host->i_size; | ||
2393 | if (pos + len > isize) | ||
2394 | vmtruncate(mapping->host, isize); | ||
2395 | } | ||
2396 | |||
2397 | return ret; | ||
2398 | } | 2331 | } |
2399 | EXPORT_SYMBOL(cont_write_begin); | 2332 | EXPORT_SYMBOL(cont_write_begin); |
2400 | 2333 | ||
2401 | int block_prepare_write(struct page *page, unsigned from, unsigned to, | ||
2402 | get_block_t *get_block) | ||
2403 | { | ||
2404 | struct inode *inode = page->mapping->host; | ||
2405 | int err = __block_prepare_write(inode, page, from, to, get_block); | ||
2406 | if (err) | ||
2407 | ClearPageUptodate(page); | ||
2408 | return err; | ||
2409 | } | ||
2410 | EXPORT_SYMBOL(block_prepare_write); | ||
2411 | |||
2412 | int block_commit_write(struct page *page, unsigned from, unsigned to) | 2334 | int block_commit_write(struct page *page, unsigned from, unsigned to) |
2413 | { | 2335 | { |
2414 | struct inode *inode = page->mapping->host; | 2336 | struct inode *inode = page->mapping->host; |
@@ -2510,11 +2432,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head) | |||
2510 | } | 2432 | } |
2511 | 2433 | ||
2512 | /* | 2434 | /* |
2513 | * Filesystems implementing the new truncate sequence should use the | 2435 | * On entry, the page is fully not uptodate. |
2514 | * _newtrunc postfix variant which won't incorrectly call vmtruncate. | 2436 | * On exit the page is fully uptodate in the areas outside (from,to) |
2515 | * The filesystem needs to handle block truncation upon failure. | 2437 | * The filesystem needs to handle block truncation upon failure. |
2516 | */ | 2438 | */ |
2517 | int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, | 2439 | int nobh_write_begin(struct address_space *mapping, |
2518 | loff_t pos, unsigned len, unsigned flags, | 2440 | loff_t pos, unsigned len, unsigned flags, |
2519 | struct page **pagep, void **fsdata, | 2441 | struct page **pagep, void **fsdata, |
2520 | get_block_t *get_block) | 2442 | get_block_t *get_block) |
@@ -2547,8 +2469,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping, | |||
2547 | unlock_page(page); | 2469 | unlock_page(page); |
2548 | page_cache_release(page); | 2470 | page_cache_release(page); |
2549 | *pagep = NULL; | 2471 | *pagep = NULL; |
2550 | return block_write_begin_newtrunc(file, mapping, pos, len, | 2472 | return block_write_begin(mapping, pos, len, flags, pagep, |
2551 | flags, pagep, fsdata, get_block); | 2473 | get_block); |
2552 | } | 2474 | } |
2553 | 2475 | ||
2554 | if (PageMappedToDisk(page)) | 2476 | if (PageMappedToDisk(page)) |
@@ -2654,35 +2576,6 @@ out_release: | |||
2654 | 2576 | ||
2655 | return ret; | 2577 | return ret; |
2656 | } | 2578 | } |
2657 | EXPORT_SYMBOL(nobh_write_begin_newtrunc); | ||
2658 | |||
2659 | /* | ||
2660 | * On entry, the page is fully not uptodate. | ||
2661 | * On exit the page is fully uptodate in the areas outside (from,to) | ||
2662 | */ | ||
2663 | int nobh_write_begin(struct file *file, struct address_space *mapping, | ||
2664 | loff_t pos, unsigned len, unsigned flags, | ||
2665 | struct page **pagep, void **fsdata, | ||
2666 | get_block_t *get_block) | ||
2667 | { | ||
2668 | int ret; | ||
2669 | |||
2670 | ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags, | ||
2671 | pagep, fsdata, get_block); | ||
2672 | |||
2673 | /* | ||
2674 | * prepare_write() may have instantiated a few blocks | ||
2675 | * outside i_size. Trim these off again. Don't need | ||
2676 | * i_size_read because we hold i_mutex. | ||
2677 | */ | ||
2678 | if (unlikely(ret)) { | ||
2679 | loff_t isize = mapping->host->i_size; | ||
2680 | if (pos + len > isize) | ||
2681 | vmtruncate(mapping->host, isize); | ||
2682 | } | ||
2683 | |||
2684 | return ret; | ||
2685 | } | ||
2686 | EXPORT_SYMBOL(nobh_write_begin); | 2579 | EXPORT_SYMBOL(nobh_write_begin); |
2687 | 2580 | ||
2688 | int nobh_write_end(struct file *file, struct address_space *mapping, | 2581 | int nobh_write_end(struct file *file, struct address_space *mapping, |
@@ -3020,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
3020 | BUG_ON(buffer_unwritten(bh)); | 2913 | BUG_ON(buffer_unwritten(bh)); |
3021 | 2914 | ||
3022 | /* | 2915 | /* |
3023 | * Mask in barrier bit for a write (could be either a WRITE or a | ||
3024 | * WRITE_SYNC | ||
3025 | */ | ||
3026 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
3027 | rw |= WRITE_BARRIER; | ||
3028 | |||
3029 | /* | ||
3030 | * Only clear out a write error when rewriting | 2916 | * Only clear out a write error when rewriting |
3031 | */ | 2917 | */ |
3032 | if (test_set_buffer_req(bh) && (rw & WRITE)) | 2918 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
@@ -3064,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); | |||
3064 | 2950 | ||
3065 | /** | 2951 | /** |
3066 | * ll_rw_block: low-level access to block devices (DEPRECATED) | 2952 | * ll_rw_block: low-level access to block devices (DEPRECATED) |
3067 | * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) | 2953 | * @rw: whether to %READ or %WRITE or maybe %READA (readahead) |
3068 | * @nr: number of &struct buffer_heads in the array | 2954 | * @nr: number of &struct buffer_heads in the array |
3069 | * @bhs: array of pointers to &struct buffer_head | 2955 | * @bhs: array of pointers to &struct buffer_head |
3070 | * | 2956 | * |
3071 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and | 2957 | * ll_rw_block() takes an array of pointers to &struct buffer_heads, and |
3072 | * requests an I/O operation on them, either a %READ or a %WRITE. The third | 2958 | * requests an I/O operation on them, either a %READ or a %WRITE. The third |
3073 | * %SWRITE is like %WRITE only we make sure that the *current* data in buffers | 2959 | * %READA option is described in the documentation for generic_make_request() |
3074 | * are sent to disk. The fourth %READA option is described in the documentation | 2960 | * which ll_rw_block() calls. |
3075 | * for generic_make_request() which ll_rw_block() calls. | ||
3076 | * | 2961 | * |
3077 | * This function drops any buffer that it cannot get a lock on (with the | 2962 | * This function drops any buffer that it cannot get a lock on (with the |
3078 | * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be | 2963 | * BH_Lock state bit), any buffer that appears to be clean when doing a write |
3079 | * clean when doing a write request, and any buffer that appears to be | 2964 | * request, and any buffer that appears to be up-to-date when doing read |
3080 | * up-to-date when doing read request. Further it marks as clean buffers that | 2965 | * request. Further it marks as clean buffers that are processed for |
3081 | * are processed for writing (the buffer cache won't assume that they are | 2966 | * writing (the buffer cache won't assume that they are actually clean |
3082 | * actually clean until the buffer gets unlocked). | 2967 | * until the buffer gets unlocked). |
3083 | * | 2968 | * |
3084 | * ll_rw_block sets b_end_io to simple completion handler that marks | 2969 | * ll_rw_block sets b_end_io to simple completion handler that marks |
3085 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes | 2970 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes |
@@ -3095,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
3095 | for (i = 0; i < nr; i++) { | 2980 | for (i = 0; i < nr; i++) { |
3096 | struct buffer_head *bh = bhs[i]; | 2981 | struct buffer_head *bh = bhs[i]; |
3097 | 2982 | ||
3098 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) | 2983 | if (!trylock_buffer(bh)) |
3099 | lock_buffer(bh); | ||
3100 | else if (!trylock_buffer(bh)) | ||
3101 | continue; | 2984 | continue; |
3102 | 2985 | if (rw == WRITE) { | |
3103 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || | ||
3104 | rw == SWRITE_SYNC_PLUG) { | ||
3105 | if (test_clear_buffer_dirty(bh)) { | 2986 | if (test_clear_buffer_dirty(bh)) { |
3106 | bh->b_end_io = end_buffer_write_sync; | 2987 | bh->b_end_io = end_buffer_write_sync; |
3107 | get_bh(bh); | 2988 | get_bh(bh); |
3108 | if (rw == SWRITE_SYNC) | 2989 | submit_bh(WRITE, bh); |
3109 | submit_bh(WRITE_SYNC, bh); | ||
3110 | else | ||
3111 | submit_bh(WRITE, bh); | ||
3112 | continue; | 2990 | continue; |
3113 | } | 2991 | } |
3114 | } else { | 2992 | } else { |
@@ -3124,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
3124 | } | 3002 | } |
3125 | EXPORT_SYMBOL(ll_rw_block); | 3003 | EXPORT_SYMBOL(ll_rw_block); |
3126 | 3004 | ||
3005 | void write_dirty_buffer(struct buffer_head *bh, int rw) | ||
3006 | { | ||
3007 | lock_buffer(bh); | ||
3008 | if (!test_clear_buffer_dirty(bh)) { | ||
3009 | unlock_buffer(bh); | ||
3010 | return; | ||
3011 | } | ||
3012 | bh->b_end_io = end_buffer_write_sync; | ||
3013 | get_bh(bh); | ||
3014 | submit_bh(rw, bh); | ||
3015 | } | ||
3016 | EXPORT_SYMBOL(write_dirty_buffer); | ||
3017 | |||
3127 | /* | 3018 | /* |
3128 | * For a data-integrity writeout, we need to wait upon any in-progress I/O | 3019 | * For a data-integrity writeout, we need to wait upon any in-progress I/O |
3129 | * and then start new I/O and then wait upon it. The caller must have a ref on | 3020 | * and then start new I/O and then wait upon it. The caller must have a ref on |
3130 | * the buffer_head. | 3021 | * the buffer_head. |
3131 | */ | 3022 | */ |
3132 | int sync_dirty_buffer(struct buffer_head *bh) | 3023 | int __sync_dirty_buffer(struct buffer_head *bh, int rw) |
3133 | { | 3024 | { |
3134 | int ret = 0; | 3025 | int ret = 0; |
3135 | 3026 | ||
@@ -3138,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
3138 | if (test_clear_buffer_dirty(bh)) { | 3029 | if (test_clear_buffer_dirty(bh)) { |
3139 | get_bh(bh); | 3030 | get_bh(bh); |
3140 | bh->b_end_io = end_buffer_write_sync; | 3031 | bh->b_end_io = end_buffer_write_sync; |
3141 | ret = submit_bh(WRITE_SYNC, bh); | 3032 | ret = submit_bh(rw, bh); |
3142 | wait_on_buffer(bh); | 3033 | wait_on_buffer(bh); |
3143 | if (buffer_eopnotsupp(bh)) { | 3034 | if (buffer_eopnotsupp(bh)) { |
3144 | clear_buffer_eopnotsupp(bh); | 3035 | clear_buffer_eopnotsupp(bh); |
@@ -3151,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
3151 | } | 3042 | } |
3152 | return ret; | 3043 | return ret; |
3153 | } | 3044 | } |
3045 | EXPORT_SYMBOL(__sync_dirty_buffer); | ||
3046 | |||
3047 | int sync_dirty_buffer(struct buffer_head *bh) | ||
3048 | { | ||
3049 | return __sync_dirty_buffer(bh, WRITE_SYNC); | ||
3050 | } | ||
3154 | EXPORT_SYMBOL(sync_dirty_buffer); | 3051 | EXPORT_SYMBOL(sync_dirty_buffer); |
3155 | 3052 | ||
3156 | /* | 3053 | /* |