diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 210 |
1 files changed, 165 insertions, 45 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 161be58c5cb0..5715dac7821f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); | 50 | static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); |
51 | static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | 51 | static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, |
52 | struct writeback_control *wbc); | 52 | enum rw_hint hint, struct writeback_control *wbc); |
53 | 53 | ||
54 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) | 54 | #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) |
55 | 55 | ||
@@ -178,7 +178,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
178 | set_buffer_uptodate(bh); | 178 | set_buffer_uptodate(bh); |
179 | } else { | 179 | } else { |
180 | buffer_io_error(bh, ", lost sync page write"); | 180 | buffer_io_error(bh, ", lost sync page write"); |
181 | set_buffer_write_io_error(bh); | 181 | mark_buffer_write_io_error(bh); |
182 | clear_buffer_uptodate(bh); | 182 | clear_buffer_uptodate(bh); |
183 | } | 183 | } |
184 | unlock_buffer(bh); | 184 | unlock_buffer(bh); |
@@ -352,8 +352,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate) | |||
352 | set_buffer_uptodate(bh); | 352 | set_buffer_uptodate(bh); |
353 | } else { | 353 | } else { |
354 | buffer_io_error(bh, ", lost async page write"); | 354 | buffer_io_error(bh, ", lost async page write"); |
355 | mapping_set_error(page->mapping, -EIO); | 355 | mark_buffer_write_io_error(bh); |
356 | set_buffer_write_io_error(bh); | ||
357 | clear_buffer_uptodate(bh); | 356 | clear_buffer_uptodate(bh); |
358 | SetPageError(page); | 357 | SetPageError(page); |
359 | } | 358 | } |
@@ -481,8 +480,6 @@ static void __remove_assoc_queue(struct buffer_head *bh) | |||
481 | { | 480 | { |
482 | list_del_init(&bh->b_assoc_buffers); | 481 | list_del_init(&bh->b_assoc_buffers); |
483 | WARN_ON(!bh->b_assoc_map); | 482 | WARN_ON(!bh->b_assoc_map); |
484 | if (buffer_write_io_error(bh)) | ||
485 | set_bit(AS_EIO, &bh->b_assoc_map->flags); | ||
486 | bh->b_assoc_map = NULL; | 483 | bh->b_assoc_map = NULL; |
487 | } | 484 | } |
488 | 485 | ||
@@ -1181,6 +1178,17 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
1181 | } | 1178 | } |
1182 | EXPORT_SYMBOL(mark_buffer_dirty); | 1179 | EXPORT_SYMBOL(mark_buffer_dirty); |
1183 | 1180 | ||
1181 | void mark_buffer_write_io_error(struct buffer_head *bh) | ||
1182 | { | ||
1183 | set_buffer_write_io_error(bh); | ||
1184 | /* FIXME: do we need to set this in both places? */ | ||
1185 | if (bh->b_page && bh->b_page->mapping) | ||
1186 | mapping_set_error(bh->b_page->mapping, -EIO); | ||
1187 | if (bh->b_assoc_map) | ||
1188 | mapping_set_error(bh->b_assoc_map, -EIO); | ||
1189 | } | ||
1190 | EXPORT_SYMBOL(mark_buffer_write_io_error); | ||
1191 | |||
1184 | /* | 1192 | /* |
1185 | * Decrement a buffer_head's reference count. If all buffers against a page | 1193 | * Decrement a buffer_head's reference count. If all buffers against a page |
1186 | * have zero reference count, are clean and unlocked, and if the page is clean | 1194 | * have zero reference count, are clean and unlocked, and if the page is clean |
@@ -1273,44 +1281,31 @@ static inline void check_irqs_on(void) | |||
1273 | } | 1281 | } |
1274 | 1282 | ||
1275 | /* | 1283 | /* |
1276 | * The LRU management algorithm is dopey-but-simple. Sorry. | 1284 | * Install a buffer_head into this cpu's LRU. If not already in the LRU, it is |
1285 | * inserted at the front, and the buffer_head at the back if any is evicted. | ||
1286 | * Or, if already in the LRU it is moved to the front. | ||
1277 | */ | 1287 | */ |
1278 | static void bh_lru_install(struct buffer_head *bh) | 1288 | static void bh_lru_install(struct buffer_head *bh) |
1279 | { | 1289 | { |
1280 | struct buffer_head *evictee = NULL; | 1290 | struct buffer_head *evictee = bh; |
1291 | struct bh_lru *b; | ||
1292 | int i; | ||
1281 | 1293 | ||
1282 | check_irqs_on(); | 1294 | check_irqs_on(); |
1283 | bh_lru_lock(); | 1295 | bh_lru_lock(); |
1284 | if (__this_cpu_read(bh_lrus.bhs[0]) != bh) { | ||
1285 | struct buffer_head *bhs[BH_LRU_SIZE]; | ||
1286 | int in; | ||
1287 | int out = 0; | ||
1288 | 1296 | ||
1289 | get_bh(bh); | 1297 | b = this_cpu_ptr(&bh_lrus); |
1290 | bhs[out++] = bh; | 1298 | for (i = 0; i < BH_LRU_SIZE; i++) { |
1291 | for (in = 0; in < BH_LRU_SIZE; in++) { | 1299 | swap(evictee, b->bhs[i]); |
1292 | struct buffer_head *bh2 = | 1300 | if (evictee == bh) { |
1293 | __this_cpu_read(bh_lrus.bhs[in]); | 1301 | bh_lru_unlock(); |
1294 | 1302 | return; | |
1295 | if (bh2 == bh) { | ||
1296 | __brelse(bh2); | ||
1297 | } else { | ||
1298 | if (out >= BH_LRU_SIZE) { | ||
1299 | BUG_ON(evictee != NULL); | ||
1300 | evictee = bh2; | ||
1301 | } else { | ||
1302 | bhs[out++] = bh2; | ||
1303 | } | ||
1304 | } | ||
1305 | } | 1303 | } |
1306 | while (out < BH_LRU_SIZE) | ||
1307 | bhs[out++] = NULL; | ||
1308 | memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs)); | ||
1309 | } | 1304 | } |
1310 | bh_lru_unlock(); | ||
1311 | 1305 | ||
1312 | if (evictee) | 1306 | get_bh(bh); |
1313 | __brelse(evictee); | 1307 | bh_lru_unlock(); |
1308 | brelse(evictee); | ||
1314 | } | 1309 | } |
1315 | 1310 | ||
1316 | /* | 1311 | /* |
@@ -1829,7 +1824,8 @@ int __block_write_full_page(struct inode *inode, struct page *page, | |||
1829 | do { | 1824 | do { |
1830 | struct buffer_head *next = bh->b_this_page; | 1825 | struct buffer_head *next = bh->b_this_page; |
1831 | if (buffer_async_write(bh)) { | 1826 | if (buffer_async_write(bh)) { |
1832 | submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); | 1827 | submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, |
1828 | inode->i_write_hint, wbc); | ||
1833 | nr_underway++; | 1829 | nr_underway++; |
1834 | } | 1830 | } |
1835 | bh = next; | 1831 | bh = next; |
@@ -1883,7 +1879,8 @@ recover: | |||
1883 | struct buffer_head *next = bh->b_this_page; | 1879 | struct buffer_head *next = bh->b_this_page; |
1884 | if (buffer_async_write(bh)) { | 1880 | if (buffer_async_write(bh)) { |
1885 | clear_buffer_dirty(bh); | 1881 | clear_buffer_dirty(bh); |
1886 | submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); | 1882 | submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, |
1883 | inode->i_write_hint, wbc); | ||
1887 | nr_underway++; | 1884 | nr_underway++; |
1888 | } | 1885 | } |
1889 | bh = next; | 1886 | bh = next; |
@@ -3021,11 +3018,11 @@ EXPORT_SYMBOL(block_write_full_page); | |||
3021 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, | 3018 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, |
3022 | get_block_t *get_block) | 3019 | get_block_t *get_block) |
3023 | { | 3020 | { |
3024 | struct buffer_head tmp; | ||
3025 | struct inode *inode = mapping->host; | 3021 | struct inode *inode = mapping->host; |
3026 | tmp.b_state = 0; | 3022 | struct buffer_head tmp = { |
3027 | tmp.b_blocknr = 0; | 3023 | .b_size = i_blocksize(inode), |
3028 | tmp.b_size = i_blocksize(inode); | 3024 | }; |
3025 | |||
3029 | get_block(inode, block, &tmp, 0); | 3026 | get_block(inode, block, &tmp, 0); |
3030 | return tmp.b_blocknr; | 3027 | return tmp.b_blocknr; |
3031 | } | 3028 | } |
@@ -3038,7 +3035,7 @@ static void end_bio_bh_io_sync(struct bio *bio) | |||
3038 | if (unlikely(bio_flagged(bio, BIO_QUIET))) | 3035 | if (unlikely(bio_flagged(bio, BIO_QUIET))) |
3039 | set_bit(BH_Quiet, &bh->b_state); | 3036 | set_bit(BH_Quiet, &bh->b_state); |
3040 | 3037 | ||
3041 | bh->b_end_io(bh, !bio->bi_error); | 3038 | bh->b_end_io(bh, !bio->bi_status); |
3042 | bio_put(bio); | 3039 | bio_put(bio); |
3043 | } | 3040 | } |
3044 | 3041 | ||
@@ -3091,7 +3088,7 @@ void guard_bio_eod(int op, struct bio *bio) | |||
3091 | } | 3088 | } |
3092 | 3089 | ||
3093 | static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | 3090 | static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, |
3094 | struct writeback_control *wbc) | 3091 | enum rw_hint write_hint, struct writeback_control *wbc) |
3095 | { | 3092 | { |
3096 | struct bio *bio; | 3093 | struct bio *bio; |
3097 | 3094 | ||
@@ -3120,6 +3117,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | |||
3120 | 3117 | ||
3121 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 3118 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
3122 | bio->bi_bdev = bh->b_bdev; | 3119 | bio->bi_bdev = bh->b_bdev; |
3120 | bio->bi_write_hint = write_hint; | ||
3123 | 3121 | ||
3124 | bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); | 3122 | bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); |
3125 | BUG_ON(bio->bi_iter.bi_size != bh->b_size); | 3123 | BUG_ON(bio->bi_iter.bi_size != bh->b_size); |
@@ -3142,7 +3140,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | |||
3142 | 3140 | ||
3143 | int submit_bh(int op, int op_flags, struct buffer_head *bh) | 3141 | int submit_bh(int op, int op_flags, struct buffer_head *bh) |
3144 | { | 3142 | { |
3145 | return submit_bh_wbc(op, op_flags, bh, NULL); | 3143 | return submit_bh_wbc(op, op_flags, bh, 0, NULL); |
3146 | } | 3144 | } |
3147 | EXPORT_SYMBOL(submit_bh); | 3145 | EXPORT_SYMBOL(submit_bh); |
3148 | 3146 | ||
@@ -3279,8 +3277,6 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) | |||
3279 | 3277 | ||
3280 | bh = head; | 3278 | bh = head; |
3281 | do { | 3279 | do { |
3282 | if (buffer_write_io_error(bh) && page->mapping) | ||
3283 | mapping_set_error(page->mapping, -EIO); | ||
3284 | if (buffer_busy(bh)) | 3280 | if (buffer_busy(bh)) |
3285 | goto failed; | 3281 | goto failed; |
3286 | bh = bh->b_this_page; | 3282 | bh = bh->b_this_page; |
@@ -3492,6 +3488,130 @@ int bh_submit_read(struct buffer_head *bh) | |||
3492 | } | 3488 | } |
3493 | EXPORT_SYMBOL(bh_submit_read); | 3489 | EXPORT_SYMBOL(bh_submit_read); |
3494 | 3490 | ||
3491 | /* | ||
3492 | * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff. | ||
3493 | * | ||
3494 | * Returns the offset within the file on success, and -ENOENT otherwise. | ||
3495 | */ | ||
3496 | static loff_t | ||
3497 | page_seek_hole_data(struct page *page, loff_t lastoff, int whence) | ||
3498 | { | ||
3499 | loff_t offset = page_offset(page); | ||
3500 | struct buffer_head *bh, *head; | ||
3501 | bool seek_data = whence == SEEK_DATA; | ||
3502 | |||
3503 | if (lastoff < offset) | ||
3504 | lastoff = offset; | ||
3505 | |||
3506 | bh = head = page_buffers(page); | ||
3507 | do { | ||
3508 | offset += bh->b_size; | ||
3509 | if (lastoff >= offset) | ||
3510 | continue; | ||
3511 | |||
3512 | /* | ||
3513 | * Unwritten extents that have data in the page cache covering | ||
3514 | * them can be identified by the BH_Unwritten state flag. | ||
3515 | * Pages with multiple buffers might have a mix of holes, data | ||
3516 | * and unwritten extents - any buffer with valid data in it | ||
3517 | * should have BH_Uptodate flag set on it. | ||
3518 | */ | ||
3519 | |||
3520 | if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data) | ||
3521 | return lastoff; | ||
3522 | |||
3523 | lastoff = offset; | ||
3524 | } while ((bh = bh->b_this_page) != head); | ||
3525 | return -ENOENT; | ||
3526 | } | ||
3527 | |||
3528 | /* | ||
3529 | * Seek for SEEK_DATA / SEEK_HOLE in the page cache. | ||
3530 | * | ||
3531 | * Within unwritten extents, the page cache determines which parts are holes | ||
3532 | * and which are data: unwritten and uptodate buffer heads count as data; | ||
3533 | * everything else counts as a hole. | ||
3534 | * | ||
3535 | * Returns the resulting offset on successs, and -ENOENT otherwise. | ||
3536 | */ | ||
3537 | loff_t | ||
3538 | page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length, | ||
3539 | int whence) | ||
3540 | { | ||
3541 | pgoff_t index = offset >> PAGE_SHIFT; | ||
3542 | pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE); | ||
3543 | loff_t lastoff = offset; | ||
3544 | struct pagevec pvec; | ||
3545 | |||
3546 | if (length <= 0) | ||
3547 | return -ENOENT; | ||
3548 | |||
3549 | pagevec_init(&pvec, 0); | ||
3550 | |||
3551 | do { | ||
3552 | unsigned want, nr_pages, i; | ||
3553 | |||
3554 | want = min_t(unsigned, end - index, PAGEVEC_SIZE); | ||
3555 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want); | ||
3556 | if (nr_pages == 0) | ||
3557 | break; | ||
3558 | |||
3559 | for (i = 0; i < nr_pages; i++) { | ||
3560 | struct page *page = pvec.pages[i]; | ||
3561 | |||
3562 | /* | ||
3563 | * At this point, the page may be truncated or | ||
3564 | * invalidated (changing page->mapping to NULL), or | ||
3565 | * even swizzled back from swapper_space to tmpfs file | ||
3566 | * mapping. However, page->index will not change | ||
3567 | * because we have a reference on the page. | ||
3568 | * | ||
3569 | * If current page offset is beyond where we've ended, | ||
3570 | * we've found a hole. | ||
3571 | */ | ||
3572 | if (whence == SEEK_HOLE && | ||
3573 | lastoff < page_offset(page)) | ||
3574 | goto check_range; | ||
3575 | |||
3576 | /* Searching done if the page index is out of range. */ | ||
3577 | if (page->index >= end) | ||
3578 | goto not_found; | ||
3579 | |||
3580 | lock_page(page); | ||
3581 | if (likely(page->mapping == inode->i_mapping) && | ||
3582 | page_has_buffers(page)) { | ||
3583 | lastoff = page_seek_hole_data(page, lastoff, whence); | ||
3584 | if (lastoff >= 0) { | ||
3585 | unlock_page(page); | ||
3586 | goto check_range; | ||
3587 | } | ||
3588 | } | ||
3589 | unlock_page(page); | ||
3590 | lastoff = page_offset(page) + PAGE_SIZE; | ||
3591 | } | ||
3592 | |||
3593 | /* Searching done if fewer pages returned than wanted. */ | ||
3594 | if (nr_pages < want) | ||
3595 | break; | ||
3596 | |||
3597 | index = pvec.pages[i - 1]->index + 1; | ||
3598 | pagevec_release(&pvec); | ||
3599 | } while (index < end); | ||
3600 | |||
3601 | /* When no page at lastoff and we are not done, we found a hole. */ | ||
3602 | if (whence != SEEK_HOLE) | ||
3603 | goto not_found; | ||
3604 | |||
3605 | check_range: | ||
3606 | if (lastoff < offset + length) | ||
3607 | goto out; | ||
3608 | not_found: | ||
3609 | lastoff = -ENOENT; | ||
3610 | out: | ||
3611 | pagevec_release(&pvec); | ||
3612 | return lastoff; | ||
3613 | } | ||
3614 | |||
3495 | void __init buffer_init(void) | 3615 | void __init buffer_init(void) |
3496 | { | 3616 | { |
3497 | unsigned long nrpages; | 3617 | unsigned long nrpages; |