aboutsummaryrefslogtreecommitdiffstats
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c210
1 files changed, 165 insertions, 45 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 161be58c5cb0..5715dac7821f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,7 +49,7 @@
49 49
50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 50static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, 51static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
52 struct writeback_control *wbc); 52 enum rw_hint hint, struct writeback_control *wbc);
53 53
54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 54#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
55 55
@@ -178,7 +178,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
178 set_buffer_uptodate(bh); 178 set_buffer_uptodate(bh);
179 } else { 179 } else {
180 buffer_io_error(bh, ", lost sync page write"); 180 buffer_io_error(bh, ", lost sync page write");
181 set_buffer_write_io_error(bh); 181 mark_buffer_write_io_error(bh);
182 clear_buffer_uptodate(bh); 182 clear_buffer_uptodate(bh);
183 } 183 }
184 unlock_buffer(bh); 184 unlock_buffer(bh);
@@ -352,8 +352,7 @@ void end_buffer_async_write(struct buffer_head *bh, int uptodate)
352 set_buffer_uptodate(bh); 352 set_buffer_uptodate(bh);
353 } else { 353 } else {
354 buffer_io_error(bh, ", lost async page write"); 354 buffer_io_error(bh, ", lost async page write");
355 mapping_set_error(page->mapping, -EIO); 355 mark_buffer_write_io_error(bh);
356 set_buffer_write_io_error(bh);
357 clear_buffer_uptodate(bh); 356 clear_buffer_uptodate(bh);
358 SetPageError(page); 357 SetPageError(page);
359 } 358 }
@@ -481,8 +480,6 @@ static void __remove_assoc_queue(struct buffer_head *bh)
481{ 480{
482 list_del_init(&bh->b_assoc_buffers); 481 list_del_init(&bh->b_assoc_buffers);
483 WARN_ON(!bh->b_assoc_map); 482 WARN_ON(!bh->b_assoc_map);
484 if (buffer_write_io_error(bh))
485 set_bit(AS_EIO, &bh->b_assoc_map->flags);
486 bh->b_assoc_map = NULL; 483 bh->b_assoc_map = NULL;
487} 484}
488 485
@@ -1181,6 +1178,17 @@ void mark_buffer_dirty(struct buffer_head *bh)
1181} 1178}
1182EXPORT_SYMBOL(mark_buffer_dirty); 1179EXPORT_SYMBOL(mark_buffer_dirty);
1183 1180
1181void mark_buffer_write_io_error(struct buffer_head *bh)
1182{
1183 set_buffer_write_io_error(bh);
1184 /* FIXME: do we need to set this in both places? */
1185 if (bh->b_page && bh->b_page->mapping)
1186 mapping_set_error(bh->b_page->mapping, -EIO);
1187 if (bh->b_assoc_map)
1188 mapping_set_error(bh->b_assoc_map, -EIO);
1189}
1190EXPORT_SYMBOL(mark_buffer_write_io_error);
1191
1184/* 1192/*
1185 * Decrement a buffer_head's reference count. If all buffers against a page 1193 * Decrement a buffer_head's reference count. If all buffers against a page
1186 * have zero reference count, are clean and unlocked, and if the page is clean 1194 * have zero reference count, are clean and unlocked, and if the page is clean
@@ -1273,44 +1281,31 @@ static inline void check_irqs_on(void)
1273} 1281}
1274 1282
1275/* 1283/*
1276 * The LRU management algorithm is dopey-but-simple. Sorry. 1284 * Install a buffer_head into this cpu's LRU. If not already in the LRU, it is
1285 * inserted at the front, and the buffer_head at the back if any is evicted.
1286 * Or, if already in the LRU it is moved to the front.
1277 */ 1287 */
1278static void bh_lru_install(struct buffer_head *bh) 1288static void bh_lru_install(struct buffer_head *bh)
1279{ 1289{
1280 struct buffer_head *evictee = NULL; 1290 struct buffer_head *evictee = bh;
1291 struct bh_lru *b;
1292 int i;
1281 1293
1282 check_irqs_on(); 1294 check_irqs_on();
1283 bh_lru_lock(); 1295 bh_lru_lock();
1284 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1285 struct buffer_head *bhs[BH_LRU_SIZE];
1286 int in;
1287 int out = 0;
1288 1296
1289 get_bh(bh); 1297 b = this_cpu_ptr(&bh_lrus);
1290 bhs[out++] = bh; 1298 for (i = 0; i < BH_LRU_SIZE; i++) {
1291 for (in = 0; in < BH_LRU_SIZE; in++) { 1299 swap(evictee, b->bhs[i]);
1292 struct buffer_head *bh2 = 1300 if (evictee == bh) {
1293 __this_cpu_read(bh_lrus.bhs[in]); 1301 bh_lru_unlock();
1294 1302 return;
1295 if (bh2 == bh) {
1296 __brelse(bh2);
1297 } else {
1298 if (out >= BH_LRU_SIZE) {
1299 BUG_ON(evictee != NULL);
1300 evictee = bh2;
1301 } else {
1302 bhs[out++] = bh2;
1303 }
1304 }
1305 } 1303 }
1306 while (out < BH_LRU_SIZE)
1307 bhs[out++] = NULL;
1308 memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1309 } 1304 }
1310 bh_lru_unlock();
1311 1305
1312 if (evictee) 1306 get_bh(bh);
1313 __brelse(evictee); 1307 bh_lru_unlock();
1308 brelse(evictee);
1314} 1309}
1315 1310
1316/* 1311/*
@@ -1829,7 +1824,8 @@ int __block_write_full_page(struct inode *inode, struct page *page,
1829 do { 1824 do {
1830 struct buffer_head *next = bh->b_this_page; 1825 struct buffer_head *next = bh->b_this_page;
1831 if (buffer_async_write(bh)) { 1826 if (buffer_async_write(bh)) {
1832 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); 1827 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1828 inode->i_write_hint, wbc);
1833 nr_underway++; 1829 nr_underway++;
1834 } 1830 }
1835 bh = next; 1831 bh = next;
@@ -1883,7 +1879,8 @@ recover:
1883 struct buffer_head *next = bh->b_this_page; 1879 struct buffer_head *next = bh->b_this_page;
1884 if (buffer_async_write(bh)) { 1880 if (buffer_async_write(bh)) {
1885 clear_buffer_dirty(bh); 1881 clear_buffer_dirty(bh);
1886 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); 1882 submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
1883 inode->i_write_hint, wbc);
1887 nr_underway++; 1884 nr_underway++;
1888 } 1885 }
1889 bh = next; 1886 bh = next;
@@ -3021,11 +3018,11 @@ EXPORT_SYMBOL(block_write_full_page);
3021sector_t generic_block_bmap(struct address_space *mapping, sector_t block, 3018sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
3022 get_block_t *get_block) 3019 get_block_t *get_block)
3023{ 3020{
3024 struct buffer_head tmp;
3025 struct inode *inode = mapping->host; 3021 struct inode *inode = mapping->host;
3026 tmp.b_state = 0; 3022 struct buffer_head tmp = {
3027 tmp.b_blocknr = 0; 3023 .b_size = i_blocksize(inode),
3028 tmp.b_size = i_blocksize(inode); 3024 };
3025
3029 get_block(inode, block, &tmp, 0); 3026 get_block(inode, block, &tmp, 0);
3030 return tmp.b_blocknr; 3027 return tmp.b_blocknr;
3031} 3028}
@@ -3038,7 +3035,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
3038 if (unlikely(bio_flagged(bio, BIO_QUIET))) 3035 if (unlikely(bio_flagged(bio, BIO_QUIET)))
3039 set_bit(BH_Quiet, &bh->b_state); 3036 set_bit(BH_Quiet, &bh->b_state);
3040 3037
3041 bh->b_end_io(bh, !bio->bi_error); 3038 bh->b_end_io(bh, !bio->bi_status);
3042 bio_put(bio); 3039 bio_put(bio);
3043} 3040}
3044 3041
@@ -3091,7 +3088,7 @@ void guard_bio_eod(int op, struct bio *bio)
3091} 3088}
3092 3089
3093static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, 3090static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3094 struct writeback_control *wbc) 3091 enum rw_hint write_hint, struct writeback_control *wbc)
3095{ 3092{
3096 struct bio *bio; 3093 struct bio *bio;
3097 3094
@@ -3120,6 +3117,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3120 3117
3121 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 3118 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3122 bio->bi_bdev = bh->b_bdev; 3119 bio->bi_bdev = bh->b_bdev;
3120 bio->bi_write_hint = write_hint;
3123 3121
3124 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 3122 bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
3125 BUG_ON(bio->bi_iter.bi_size != bh->b_size); 3123 BUG_ON(bio->bi_iter.bi_size != bh->b_size);
@@ -3142,7 +3140,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3142 3140
3143int submit_bh(int op, int op_flags, struct buffer_head *bh) 3141int submit_bh(int op, int op_flags, struct buffer_head *bh)
3144{ 3142{
3145 return submit_bh_wbc(op, op_flags, bh, NULL); 3143 return submit_bh_wbc(op, op_flags, bh, 0, NULL);
3146} 3144}
3147EXPORT_SYMBOL(submit_bh); 3145EXPORT_SYMBOL(submit_bh);
3148 3146
@@ -3279,8 +3277,6 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
3279 3277
3280 bh = head; 3278 bh = head;
3281 do { 3279 do {
3282 if (buffer_write_io_error(bh) && page->mapping)
3283 mapping_set_error(page->mapping, -EIO);
3284 if (buffer_busy(bh)) 3280 if (buffer_busy(bh))
3285 goto failed; 3281 goto failed;
3286 bh = bh->b_this_page; 3282 bh = bh->b_this_page;
@@ -3492,6 +3488,130 @@ int bh_submit_read(struct buffer_head *bh)
3492} 3488}
3493EXPORT_SYMBOL(bh_submit_read); 3489EXPORT_SYMBOL(bh_submit_read);
3494 3490
3491/*
3492 * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
3493 *
3494 * Returns the offset within the file on success, and -ENOENT otherwise.
3495 */
3496static loff_t
3497page_seek_hole_data(struct page *page, loff_t lastoff, int whence)
3498{
3499 loff_t offset = page_offset(page);
3500 struct buffer_head *bh, *head;
3501 bool seek_data = whence == SEEK_DATA;
3502
3503 if (lastoff < offset)
3504 lastoff = offset;
3505
3506 bh = head = page_buffers(page);
3507 do {
3508 offset += bh->b_size;
3509 if (lastoff >= offset)
3510 continue;
3511
3512 /*
3513 * Unwritten extents that have data in the page cache covering
3514 * them can be identified by the BH_Unwritten state flag.
3515 * Pages with multiple buffers might have a mix of holes, data
3516 * and unwritten extents - any buffer with valid data in it
3517 * should have BH_Uptodate flag set on it.
3518 */
3519
3520 if ((buffer_unwritten(bh) || buffer_uptodate(bh)) == seek_data)
3521 return lastoff;
3522
3523 lastoff = offset;
3524 } while ((bh = bh->b_this_page) != head);
3525 return -ENOENT;
3526}
3527
3528/*
3529 * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
3530 *
3531 * Within unwritten extents, the page cache determines which parts are holes
3532 * and which are data: unwritten and uptodate buffer heads count as data;
3533 * everything else counts as a hole.
3534 *
3535 * Returns the resulting offset on successs, and -ENOENT otherwise.
3536 */
3537loff_t
3538page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
3539 int whence)
3540{
3541 pgoff_t index = offset >> PAGE_SHIFT;
3542 pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
3543 loff_t lastoff = offset;
3544 struct pagevec pvec;
3545
3546 if (length <= 0)
3547 return -ENOENT;
3548
3549 pagevec_init(&pvec, 0);
3550
3551 do {
3552 unsigned want, nr_pages, i;
3553
3554 want = min_t(unsigned, end - index, PAGEVEC_SIZE);
3555 nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, want);
3556 if (nr_pages == 0)
3557 break;
3558
3559 for (i = 0; i < nr_pages; i++) {
3560 struct page *page = pvec.pages[i];
3561
3562 /*
3563 * At this point, the page may be truncated or
3564 * invalidated (changing page->mapping to NULL), or
3565 * even swizzled back from swapper_space to tmpfs file
3566 * mapping. However, page->index will not change
3567 * because we have a reference on the page.
3568 *
3569 * If current page offset is beyond where we've ended,
3570 * we've found a hole.
3571 */
3572 if (whence == SEEK_HOLE &&
3573 lastoff < page_offset(page))
3574 goto check_range;
3575
3576 /* Searching done if the page index is out of range. */
3577 if (page->index >= end)
3578 goto not_found;
3579
3580 lock_page(page);
3581 if (likely(page->mapping == inode->i_mapping) &&
3582 page_has_buffers(page)) {
3583 lastoff = page_seek_hole_data(page, lastoff, whence);
3584 if (lastoff >= 0) {
3585 unlock_page(page);
3586 goto check_range;
3587 }
3588 }
3589 unlock_page(page);
3590 lastoff = page_offset(page) + PAGE_SIZE;
3591 }
3592
3593 /* Searching done if fewer pages returned than wanted. */
3594 if (nr_pages < want)
3595 break;
3596
3597 index = pvec.pages[i - 1]->index + 1;
3598 pagevec_release(&pvec);
3599 } while (index < end);
3600
3601 /* When no page at lastoff and we are not done, we found a hole. */
3602 if (whence != SEEK_HOLE)
3603 goto not_found;
3604
3605check_range:
3606 if (lastoff < offset + length)
3607 goto out;
3608not_found:
3609 lastoff = -ENOENT;
3610out:
3611 pagevec_release(&pvec);
3612 return lastoff;
3613}
3614
3495void __init buffer_init(void) 3615void __init buffer_init(void)
3496{ 3616{
3497 unsigned long nrpages; 3617 unsigned long nrpages;