diff options
Diffstat (limited to 'fs/buffer.c')
| -rw-r--r-- | fs/buffer.c | 323 | 
1 files changed, 137 insertions, 186 deletions
| diff --git a/fs/buffer.c b/fs/buffer.c index 891e1c78e4f1..28f320fac4d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -166,151 +166,6 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
| 166 | } | 166 | } | 
| 167 | 167 | ||
| 168 | /* | 168 | /* | 
| 169 | * Write out and wait upon all the dirty data associated with a block | ||
| 170 | * device via its mapping. Does not take the superblock lock. | ||
| 171 | */ | ||
| 172 | int sync_blockdev(struct block_device *bdev) | ||
| 173 | { | ||
| 174 | int ret = 0; | ||
| 175 | |||
| 176 | if (bdev) | ||
| 177 | ret = filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
| 178 | return ret; | ||
| 179 | } | ||
| 180 | EXPORT_SYMBOL(sync_blockdev); | ||
| 181 | |||
| 182 | /* | ||
| 183 | * Write out and wait upon all dirty data associated with this | ||
| 184 | * device. Filesystem data as well as the underlying block | ||
| 185 | * device. Takes the superblock lock. | ||
| 186 | */ | ||
| 187 | int fsync_bdev(struct block_device *bdev) | ||
| 188 | { | ||
| 189 | struct super_block *sb = get_super(bdev); | ||
| 190 | if (sb) { | ||
| 191 | int res = fsync_super(sb); | ||
| 192 | drop_super(sb); | ||
| 193 | return res; | ||
| 194 | } | ||
| 195 | return sync_blockdev(bdev); | ||
| 196 | } | ||
| 197 | |||
| 198 | /** | ||
| 199 | * freeze_bdev -- lock a filesystem and force it into a consistent state | ||
| 200 | * @bdev: blockdevice to lock | ||
| 201 | * | ||
| 202 | * This takes the block device bd_mount_sem to make sure no new mounts | ||
| 203 | * happen on bdev until thaw_bdev() is called. | ||
| 204 | * If a superblock is found on this device, we take the s_umount semaphore | ||
| 205 | * on it to make sure nobody unmounts until the snapshot creation is done. | ||
| 206 | * The reference counter (bd_fsfreeze_count) guarantees that only the last | ||
| 207 | * unfreeze process can unfreeze the frozen filesystem actually when multiple | ||
| 208 | * freeze requests arrive simultaneously. It counts up in freeze_bdev() and | ||
| 209 | * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze | ||
| 210 | * actually. | ||
| 211 | */ | ||
| 212 | struct super_block *freeze_bdev(struct block_device *bdev) | ||
| 213 | { | ||
| 214 | struct super_block *sb; | ||
| 215 | int error = 0; | ||
| 216 | |||
| 217 | mutex_lock(&bdev->bd_fsfreeze_mutex); | ||
| 218 | if (bdev->bd_fsfreeze_count > 0) { | ||
| 219 | bdev->bd_fsfreeze_count++; | ||
| 220 | sb = get_super(bdev); | ||
| 221 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 222 | return sb; | ||
| 223 | } | ||
| 224 | bdev->bd_fsfreeze_count++; | ||
| 225 | |||
| 226 | down(&bdev->bd_mount_sem); | ||
| 227 | sb = get_super(bdev); | ||
| 228 | if (sb && !(sb->s_flags & MS_RDONLY)) { | ||
| 229 | sb->s_frozen = SB_FREEZE_WRITE; | ||
| 230 | smp_wmb(); | ||
| 231 | |||
| 232 | __fsync_super(sb); | ||
| 233 | |||
| 234 | sb->s_frozen = SB_FREEZE_TRANS; | ||
| 235 | smp_wmb(); | ||
| 236 | |||
| 237 | sync_blockdev(sb->s_bdev); | ||
| 238 | |||
| 239 | if (sb->s_op->freeze_fs) { | ||
| 240 | error = sb->s_op->freeze_fs(sb); | ||
| 241 | if (error) { | ||
| 242 | printk(KERN_ERR | ||
| 243 | "VFS:Filesystem freeze failed\n"); | ||
| 244 | sb->s_frozen = SB_UNFROZEN; | ||
| 245 | drop_super(sb); | ||
| 246 | up(&bdev->bd_mount_sem); | ||
| 247 | bdev->bd_fsfreeze_count--; | ||
| 248 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 249 | return ERR_PTR(error); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | sync_blockdev(bdev); | ||
| 255 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 256 | |||
| 257 | return sb; /* thaw_bdev releases s->s_umount and bd_mount_sem */ | ||
| 258 | } | ||
| 259 | EXPORT_SYMBOL(freeze_bdev); | ||
| 260 | |||
| 261 | /** | ||
| 262 | * thaw_bdev -- unlock filesystem | ||
| 263 | * @bdev: blockdevice to unlock | ||
| 264 | * @sb: associated superblock | ||
| 265 | * | ||
| 266 | * Unlocks the filesystem and marks it writeable again after freeze_bdev(). | ||
| 267 | */ | ||
| 268 | int thaw_bdev(struct block_device *bdev, struct super_block *sb) | ||
| 269 | { | ||
| 270 | int error = 0; | ||
| 271 | |||
| 272 | mutex_lock(&bdev->bd_fsfreeze_mutex); | ||
| 273 | if (!bdev->bd_fsfreeze_count) { | ||
| 274 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 275 | return -EINVAL; | ||
| 276 | } | ||
| 277 | |||
| 278 | bdev->bd_fsfreeze_count--; | ||
| 279 | if (bdev->bd_fsfreeze_count > 0) { | ||
| 280 | if (sb) | ||
| 281 | drop_super(sb); | ||
| 282 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 283 | return 0; | ||
| 284 | } | ||
| 285 | |||
| 286 | if (sb) { | ||
| 287 | BUG_ON(sb->s_bdev != bdev); | ||
| 288 | if (!(sb->s_flags & MS_RDONLY)) { | ||
| 289 | if (sb->s_op->unfreeze_fs) { | ||
| 290 | error = sb->s_op->unfreeze_fs(sb); | ||
| 291 | if (error) { | ||
| 292 | printk(KERN_ERR | ||
| 293 | "VFS:Filesystem thaw failed\n"); | ||
| 294 | sb->s_frozen = SB_FREEZE_TRANS; | ||
| 295 | bdev->bd_fsfreeze_count++; | ||
| 296 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 297 | return error; | ||
| 298 | } | ||
| 299 | } | ||
| 300 | sb->s_frozen = SB_UNFROZEN; | ||
| 301 | smp_wmb(); | ||
| 302 | wake_up(&sb->s_wait_unfrozen); | ||
| 303 | } | ||
| 304 | drop_super(sb); | ||
| 305 | } | ||
| 306 | |||
| 307 | up(&bdev->bd_mount_sem); | ||
| 308 | mutex_unlock(&bdev->bd_fsfreeze_mutex); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | EXPORT_SYMBOL(thaw_bdev); | ||
| 312 | |||
| 313 | /* | ||
| 314 | * Various filesystems appear to want __find_get_block to be non-blocking. | 169 | * Various filesystems appear to want __find_get_block to be non-blocking. | 
| 315 | * But it's the page lock which protects the buffers. To get around this, | 170 | * But it's the page lock which protects the buffers. To get around this, | 
| 316 | * we get exclusion from try_to_free_buffers with the blockdev mapping's | 171 | * we get exclusion from try_to_free_buffers with the blockdev mapping's | 
| @@ -344,13 +199,13 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) | |||
| 344 | head = page_buffers(page); | 199 | head = page_buffers(page); | 
| 345 | bh = head; | 200 | bh = head; | 
| 346 | do { | 201 | do { | 
| 347 | if (bh->b_blocknr == block) { | 202 | if (!buffer_mapped(bh)) | 
| 203 | all_mapped = 0; | ||
| 204 | else if (bh->b_blocknr == block) { | ||
| 348 | ret = bh; | 205 | ret = bh; | 
| 349 | get_bh(bh); | 206 | get_bh(bh); | 
| 350 | goto out_unlock; | 207 | goto out_unlock; | 
| 351 | } | 208 | } | 
| 352 | if (!buffer_mapped(bh)) | ||
| 353 | all_mapped = 0; | ||
| 354 | bh = bh->b_this_page; | 209 | bh = bh->b_this_page; | 
| 355 | } while (bh != head); | 210 | } while (bh != head); | 
| 356 | 211 | ||
| @@ -435,7 +290,7 @@ static void free_more_memory(void) | |||
| 435 | &zone); | 290 | &zone); | 
| 436 | if (zone) | 291 | if (zone) | 
| 437 | try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, | 292 | try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0, | 
| 438 | GFP_NOFS); | 293 | GFP_NOFS, NULL); | 
| 439 | } | 294 | } | 
| 440 | } | 295 | } | 
| 441 | 296 | ||
| @@ -505,7 +360,7 @@ still_busy: | |||
| 505 | * Completion handler for block_write_full_page() - pages which are unlocked | 360 | * Completion handler for block_write_full_page() - pages which are unlocked | 
| 506 | * during I/O, and which have PageWriteback cleared upon I/O completion. | 361 | * during I/O, and which have PageWriteback cleared upon I/O completion. | 
| 507 | */ | 362 | */ | 
| 508 | static void end_buffer_async_write(struct buffer_head *bh, int uptodate) | 363 | void end_buffer_async_write(struct buffer_head *bh, int uptodate) | 
| 509 | { | 364 | { | 
| 510 | char b[BDEVNAME_SIZE]; | 365 | char b[BDEVNAME_SIZE]; | 
| 511 | unsigned long flags; | 366 | unsigned long flags; | 
| @@ -583,11 +438,17 @@ static void mark_buffer_async_read(struct buffer_head *bh) | |||
| 583 | set_buffer_async_read(bh); | 438 | set_buffer_async_read(bh); | 
| 584 | } | 439 | } | 
| 585 | 440 | ||
| 586 | void mark_buffer_async_write(struct buffer_head *bh) | 441 | void mark_buffer_async_write_endio(struct buffer_head *bh, | 
| 442 | bh_end_io_t *handler) | ||
| 587 | { | 443 | { | 
| 588 | bh->b_end_io = end_buffer_async_write; | 444 | bh->b_end_io = handler; | 
| 589 | set_buffer_async_write(bh); | 445 | set_buffer_async_write(bh); | 
| 590 | } | 446 | } | 
| 447 | |||
| 448 | void mark_buffer_async_write(struct buffer_head *bh) | ||
| 449 | { | ||
| 450 | mark_buffer_async_write_endio(bh, end_buffer_async_write); | ||
| 451 | } | ||
| 591 | EXPORT_SYMBOL(mark_buffer_async_write); | 452 | EXPORT_SYMBOL(mark_buffer_async_write); | 
| 592 | 453 | ||
| 593 | 454 | ||
| @@ -692,6 +553,46 @@ repeat: | |||
| 692 | return err; | 553 | return err; | 
| 693 | } | 554 | } | 
| 694 | 555 | ||
| 556 | void do_thaw_all(struct work_struct *work) | ||
| 557 | { | ||
| 558 | struct super_block *sb; | ||
| 559 | char b[BDEVNAME_SIZE]; | ||
| 560 | |||
| 561 | spin_lock(&sb_lock); | ||
| 562 | restart: | ||
| 563 | list_for_each_entry(sb, &super_blocks, s_list) { | ||
| 564 | sb->s_count++; | ||
| 565 | spin_unlock(&sb_lock); | ||
| 566 | down_read(&sb->s_umount); | ||
| 567 | while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) | ||
| 568 | printk(KERN_WARNING "Emergency Thaw on %s\n", | ||
| 569 | bdevname(sb->s_bdev, b)); | ||
| 570 | up_read(&sb->s_umount); | ||
| 571 | spin_lock(&sb_lock); | ||
| 572 | if (__put_super_and_need_restart(sb)) | ||
| 573 | goto restart; | ||
| 574 | } | ||
| 575 | spin_unlock(&sb_lock); | ||
| 576 | kfree(work); | ||
| 577 | printk(KERN_WARNING "Emergency Thaw complete\n"); | ||
| 578 | } | ||
| 579 | |||
| 580 | /** | ||
| 581 | * emergency_thaw_all -- forcibly thaw every frozen filesystem | ||
| 582 | * | ||
| 583 | * Used for emergency unfreeze of all filesystems via SysRq | ||
| 584 | */ | ||
| 585 | void emergency_thaw_all(void) | ||
| 586 | { | ||
| 587 | struct work_struct *work; | ||
| 588 | |||
| 589 | work = kmalloc(sizeof(*work), GFP_ATOMIC); | ||
| 590 | if (work) { | ||
| 591 | INIT_WORK(work, do_thaw_all); | ||
| 592 | schedule_work(work); | ||
| 593 | } | ||
| 594 | } | ||
| 595 | |||
| 695 | /** | 596 | /** | 
| 696 | * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers | 597 | * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers | 
| 697 | * @mapping: the mapping which wants those buffers written | 598 | * @mapping: the mapping which wants those buffers written | 
| @@ -766,14 +667,7 @@ static void __set_page_dirty(struct page *page, | |||
| 766 | spin_lock_irq(&mapping->tree_lock); | 667 | spin_lock_irq(&mapping->tree_lock); | 
| 767 | if (page->mapping) { /* Race with truncate? */ | 668 | if (page->mapping) { /* Race with truncate? */ | 
| 768 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 669 | WARN_ON_ONCE(warn && !PageUptodate(page)); | 
| 769 | 670 | account_page_dirtied(page, mapping); | |
| 770 | if (mapping_cap_account_dirty(mapping)) { | ||
| 771 | __inc_zone_page_state(page, NR_FILE_DIRTY); | ||
| 772 | __inc_bdi_stat(mapping->backing_dev_info, | ||
| 773 | BDI_RECLAIMABLE); | ||
| 774 | task_dirty_inc(current); | ||
| 775 | task_io_account_write(PAGE_CACHE_SIZE); | ||
| 776 | } | ||
| 777 | radix_tree_tag_set(&mapping->page_tree, | 671 | radix_tree_tag_set(&mapping->page_tree, | 
| 778 | page_index(page), PAGECACHE_TAG_DIRTY); | 672 | page_index(page), PAGECACHE_TAG_DIRTY); | 
| 779 | } | 673 | } | 
| @@ -856,7 +750,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
| 856 | { | 750 | { | 
| 857 | struct buffer_head *bh; | 751 | struct buffer_head *bh; | 
| 858 | struct list_head tmp; | 752 | struct list_head tmp; | 
| 859 | struct address_space *mapping; | 753 | struct address_space *mapping, *prev_mapping = NULL; | 
| 860 | int err = 0, err2; | 754 | int err = 0, err2; | 
| 861 | 755 | ||
| 862 | INIT_LIST_HEAD(&tmp); | 756 | INIT_LIST_HEAD(&tmp); | 
| @@ -881,7 +775,18 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) | |||
| 881 | * contents - it is a noop if I/O is still in | 775 | * contents - it is a noop if I/O is still in | 
| 882 | * flight on potentially older contents. | 776 | * flight on potentially older contents. | 
| 883 | */ | 777 | */ | 
| 884 | ll_rw_block(SWRITE_SYNC, 1, &bh); | 778 | ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); | 
| 779 | |||
| 780 | /* | ||
| 781 | * Kick off IO for the previous mapping. Note | ||
| 782 | * that we will not run the very last mapping, | ||
| 783 | * wait_on_buffer() will do that for us | ||
| 784 | * through sync_buffer(). | ||
| 785 | */ | ||
| 786 | if (prev_mapping && prev_mapping != mapping) | ||
| 787 | blk_run_address_space(prev_mapping); | ||
| 788 | prev_mapping = mapping; | ||
| 789 | |||
| 885 | brelse(bh); | 790 | brelse(bh); | 
| 886 | spin_lock(lock); | 791 | spin_lock(lock); | 
| 887 | } | 792 | } | 
| @@ -1180,12 +1085,12 @@ static struct buffer_head * | |||
| 1180 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 1085 | __getblk_slow(struct block_device *bdev, sector_t block, int size) | 
| 1181 | { | 1086 | { | 
| 1182 | /* Size must be multiple of hard sectorsize */ | 1087 | /* Size must be multiple of hard sectorsize */ | 
| 1183 | if (unlikely(size & (bdev_hardsect_size(bdev)-1) || | 1088 | if (unlikely(size & (bdev_logical_block_size(bdev)-1) || | 
| 1184 | (size < 512 || size > PAGE_SIZE))) { | 1089 | (size < 512 || size > PAGE_SIZE))) { | 
| 1185 | printk(KERN_ERR "getblk(): invalid block size %d requested\n", | 1090 | printk(KERN_ERR "getblk(): invalid block size %d requested\n", | 
| 1186 | size); | 1091 | size); | 
| 1187 | printk(KERN_ERR "hardsect size: %d\n", | 1092 | printk(KERN_ERR "logical block size: %d\n", | 
| 1188 | bdev_hardsect_size(bdev)); | 1093 | bdev_logical_block_size(bdev)); | 
| 1189 | 1094 | ||
| 1190 | dump_stack(); | 1095 | dump_stack(); | 
| 1191 | return NULL; | 1096 | return NULL; | 
| @@ -1260,8 +1165,11 @@ void mark_buffer_dirty(struct buffer_head *bh) | |||
| 1260 | 1165 | ||
| 1261 | if (!test_set_buffer_dirty(bh)) { | 1166 | if (!test_set_buffer_dirty(bh)) { | 
| 1262 | struct page *page = bh->b_page; | 1167 | struct page *page = bh->b_page; | 
| 1263 | if (!TestSetPageDirty(page)) | 1168 | if (!TestSetPageDirty(page)) { | 
| 1264 | __set_page_dirty(page, page_mapping(page), 0); | 1169 | struct address_space *mapping = page_mapping(page); | 
| 1170 | if (mapping) | ||
| 1171 | __set_page_dirty(page, mapping, 0); | ||
| 1172 | } | ||
| 1265 | } | 1173 | } | 
| 1266 | } | 1174 | } | 
| 1267 | 1175 | ||
| @@ -1704,9 +1612,20 @@ EXPORT_SYMBOL(unmap_underlying_metadata); | |||
| 1704 | * locked buffer. This only can happen if someone has written the buffer | 1612 | * locked buffer. This only can happen if someone has written the buffer | 
| 1705 | * directly, with submit_bh(). At the address_space level PageWriteback | 1613 | * directly, with submit_bh(). At the address_space level PageWriteback | 
| 1706 | * prevents this contention from occurring. | 1614 | * prevents this contention from occurring. | 
| 1615 | * | ||
| 1616 | * If block_write_full_page() is called with wbc->sync_mode == | ||
| 1617 | * WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this | ||
| 1618 | * causes the writes to be flagged as synchronous writes, but the | ||
| 1619 | * block device queue will NOT be unplugged, since usually many pages | ||
| 1620 | * will be pushed to the out before the higher-level caller actually | ||
| 1621 | * waits for the writes to be completed. The various wait functions, | ||
| 1622 | * such as wait_on_writeback_range() will ultimately call sync_page() | ||
| 1623 | * which will ultimately call blk_run_backing_dev(), which will end up | ||
| 1624 | * unplugging the device queue. | ||
| 1707 | */ | 1625 | */ | 
| 1708 | static int __block_write_full_page(struct inode *inode, struct page *page, | 1626 | static int __block_write_full_page(struct inode *inode, struct page *page, | 
| 1709 | get_block_t *get_block, struct writeback_control *wbc) | 1627 | get_block_t *get_block, struct writeback_control *wbc, | 
| 1628 | bh_end_io_t *handler) | ||
| 1710 | { | 1629 | { | 
| 1711 | int err; | 1630 | int err; | 
| 1712 | sector_t block; | 1631 | sector_t block; | 
| @@ -1714,6 +1633,8 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1714 | struct buffer_head *bh, *head; | 1633 | struct buffer_head *bh, *head; | 
| 1715 | const unsigned blocksize = 1 << inode->i_blkbits; | 1634 | const unsigned blocksize = 1 << inode->i_blkbits; | 
| 1716 | int nr_underway = 0; | 1635 | int nr_underway = 0; | 
| 1636 | int write_op = (wbc->sync_mode == WB_SYNC_ALL ? | ||
| 1637 | WRITE_SYNC_PLUG : WRITE); | ||
| 1717 | 1638 | ||
| 1718 | BUG_ON(!PageLocked(page)); | 1639 | BUG_ON(!PageLocked(page)); | 
| 1719 | 1640 | ||
| @@ -1789,7 +1710,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1789 | continue; | 1710 | continue; | 
| 1790 | } | 1711 | } | 
| 1791 | if (test_clear_buffer_dirty(bh)) { | 1712 | if (test_clear_buffer_dirty(bh)) { | 
| 1792 | mark_buffer_async_write(bh); | 1713 | mark_buffer_async_write_endio(bh, handler); | 
| 1793 | } else { | 1714 | } else { | 
| 1794 | unlock_buffer(bh); | 1715 | unlock_buffer(bh); | 
| 1795 | } | 1716 | } | 
| @@ -1805,7 +1726,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
| 1805 | do { | 1726 | do { | 
| 1806 | struct buffer_head *next = bh->b_this_page; | 1727 | struct buffer_head *next = bh->b_this_page; | 
| 1807 | if (buffer_async_write(bh)) { | 1728 | if (buffer_async_write(bh)) { | 
| 1808 | submit_bh(WRITE, bh); | 1729 | submit_bh(write_op, bh); | 
| 1809 | nr_underway++; | 1730 | nr_underway++; | 
| 1810 | } | 1731 | } | 
| 1811 | bh = next; | 1732 | bh = next; | 
| @@ -1842,7 +1763,7 @@ recover: | |||
| 1842 | if (buffer_mapped(bh) && buffer_dirty(bh) && | 1763 | if (buffer_mapped(bh) && buffer_dirty(bh) && | 
| 1843 | !buffer_delay(bh)) { | 1764 | !buffer_delay(bh)) { | 
| 1844 | lock_buffer(bh); | 1765 | lock_buffer(bh); | 
| 1845 | mark_buffer_async_write(bh); | 1766 | mark_buffer_async_write_endio(bh, handler); | 
| 1846 | } else { | 1767 | } else { | 
| 1847 | /* | 1768 | /* | 
| 1848 | * The buffer may have been set dirty during | 1769 | * The buffer may have been set dirty during | 
| @@ -1859,7 +1780,7 @@ recover: | |||
| 1859 | struct buffer_head *next = bh->b_this_page; | 1780 | struct buffer_head *next = bh->b_this_page; | 
| 1860 | if (buffer_async_write(bh)) { | 1781 | if (buffer_async_write(bh)) { | 
| 1861 | clear_buffer_dirty(bh); | 1782 | clear_buffer_dirty(bh); | 
| 1862 | submit_bh(WRITE, bh); | 1783 | submit_bh(write_op, bh); | 
| 1863 | nr_underway++; | 1784 | nr_underway++; | 
| 1864 | } | 1785 | } | 
| 1865 | bh = next; | 1786 | bh = next; | 
| @@ -2465,20 +2386,22 @@ int block_commit_write(struct page *page, unsigned from, unsigned to) | |||
| 2465 | * unlock the page. | 2386 | * unlock the page. | 
| 2466 | */ | 2387 | */ | 
| 2467 | int | 2388 | int | 
| 2468 | block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | 2389 | block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | 
| 2469 | get_block_t get_block) | 2390 | get_block_t get_block) | 
| 2470 | { | 2391 | { | 
| 2392 | struct page *page = vmf->page; | ||
| 2471 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 2393 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 
| 2472 | unsigned long end; | 2394 | unsigned long end; | 
| 2473 | loff_t size; | 2395 | loff_t size; | 
| 2474 | int ret = -EINVAL; | 2396 | int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 
| 2475 | 2397 | ||
| 2476 | lock_page(page); | 2398 | lock_page(page); | 
| 2477 | size = i_size_read(inode); | 2399 | size = i_size_read(inode); | 
| 2478 | if ((page->mapping != inode->i_mapping) || | 2400 | if ((page->mapping != inode->i_mapping) || | 
| 2479 | (page_offset(page) > size)) { | 2401 | (page_offset(page) > size)) { | 
| 2480 | /* page got truncated out from underneath us */ | 2402 | /* page got truncated out from underneath us */ | 
| 2481 | goto out_unlock; | 2403 | unlock_page(page); | 
| 2404 | goto out; | ||
| 2482 | } | 2405 | } | 
| 2483 | 2406 | ||
| 2484 | /* page is wholly or partially inside EOF */ | 2407 | /* page is wholly or partially inside EOF */ | 
| @@ -2491,8 +2414,16 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page, | |||
| 2491 | if (!ret) | 2414 | if (!ret) | 
| 2492 | ret = block_commit_write(page, 0, end); | 2415 | ret = block_commit_write(page, 0, end); | 
| 2493 | 2416 | ||
| 2494 | out_unlock: | 2417 | if (unlikely(ret)) { | 
| 2495 | unlock_page(page); | 2418 | unlock_page(page); | 
| 2419 | if (ret == -ENOMEM) | ||
| 2420 | ret = VM_FAULT_OOM; | ||
| 2421 | else /* -ENOSPC, -EIO, etc */ | ||
| 2422 | ret = VM_FAULT_SIGBUS; | ||
| 2423 | } else | ||
| 2424 | ret = VM_FAULT_LOCKED; | ||
| 2425 | |||
| 2426 | out: | ||
| 2496 | return ret; | 2427 | return ret; | 
| 2497 | } | 2428 | } | 
| 2498 | 2429 | ||
| @@ -2760,7 +2691,8 @@ int nobh_writepage(struct page *page, get_block_t *get_block, | |||
| 2760 | out: | 2691 | out: | 
| 2761 | ret = mpage_writepage(page, get_block, wbc); | 2692 | ret = mpage_writepage(page, get_block, wbc); | 
| 2762 | if (ret == -EAGAIN) | 2693 | if (ret == -EAGAIN) | 
| 2763 | ret = __block_write_full_page(inode, page, get_block, wbc); | 2694 | ret = __block_write_full_page(inode, page, get_block, wbc, | 
| 2695 | end_buffer_async_write); | ||
| 2764 | return ret; | 2696 | return ret; | 
| 2765 | } | 2697 | } | 
| 2766 | EXPORT_SYMBOL(nobh_writepage); | 2698 | EXPORT_SYMBOL(nobh_writepage); | 
| @@ -2807,6 +2739,8 @@ has_buffers: | |||
| 2807 | pos += blocksize; | 2739 | pos += blocksize; | 
| 2808 | } | 2740 | } | 
| 2809 | 2741 | ||
| 2742 | map_bh.b_size = blocksize; | ||
| 2743 | map_bh.b_state = 0; | ||
| 2810 | err = get_block(inode, iblock, &map_bh, 0); | 2744 | err = get_block(inode, iblock, &map_bh, 0); | 
| 2811 | if (err) | 2745 | if (err) | 
| 2812 | goto unlock; | 2746 | goto unlock; | 
| @@ -2918,9 +2852,10 @@ out: | |||
| 2918 | 2852 | ||
| 2919 | /* | 2853 | /* | 
| 2920 | * The generic ->writepage function for buffer-backed address_spaces | 2854 | * The generic ->writepage function for buffer-backed address_spaces | 
| 2855 | * this form passes in the end_io handler used to finish the IO. | ||
| 2921 | */ | 2856 | */ | 
| 2922 | int block_write_full_page(struct page *page, get_block_t *get_block, | 2857 | int block_write_full_page_endio(struct page *page, get_block_t *get_block, | 
| 2923 | struct writeback_control *wbc) | 2858 | struct writeback_control *wbc, bh_end_io_t *handler) | 
| 2924 | { | 2859 | { | 
| 2925 | struct inode * const inode = page->mapping->host; | 2860 | struct inode * const inode = page->mapping->host; | 
| 2926 | loff_t i_size = i_size_read(inode); | 2861 | loff_t i_size = i_size_read(inode); | 
| @@ -2929,7 +2864,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
| 2929 | 2864 | ||
| 2930 | /* Is the page fully inside i_size? */ | 2865 | /* Is the page fully inside i_size? */ | 
| 2931 | if (page->index < end_index) | 2866 | if (page->index < end_index) | 
| 2932 | return __block_write_full_page(inode, page, get_block, wbc); | 2867 | return __block_write_full_page(inode, page, get_block, wbc, | 
| 2868 | handler); | ||
| 2933 | 2869 | ||
| 2934 | /* Is the page fully outside i_size? (truncate in progress) */ | 2870 | /* Is the page fully outside i_size? (truncate in progress) */ | 
| 2935 | offset = i_size & (PAGE_CACHE_SIZE-1); | 2871 | offset = i_size & (PAGE_CACHE_SIZE-1); | 
| @@ -2952,9 +2888,20 @@ int block_write_full_page(struct page *page, get_block_t *get_block, | |||
| 2952 | * writes to that region are not written out to the file." | 2888 | * writes to that region are not written out to the file." | 
| 2953 | */ | 2889 | */ | 
| 2954 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 2890 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 
| 2955 | return __block_write_full_page(inode, page, get_block, wbc); | 2891 | return __block_write_full_page(inode, page, get_block, wbc, handler); | 
| 2892 | } | ||
| 2893 | |||
| 2894 | /* | ||
| 2895 | * The generic ->writepage function for buffer-backed address_spaces | ||
| 2896 | */ | ||
| 2897 | int block_write_full_page(struct page *page, get_block_t *get_block, | ||
| 2898 | struct writeback_control *wbc) | ||
| 2899 | { | ||
| 2900 | return block_write_full_page_endio(page, get_block, wbc, | ||
| 2901 | end_buffer_async_write); | ||
| 2956 | } | 2902 | } | 
| 2957 | 2903 | ||
| 2904 | |||
| 2958 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, | 2905 | sector_t generic_block_bmap(struct address_space *mapping, sector_t block, | 
| 2959 | get_block_t *get_block) | 2906 | get_block_t *get_block) | 
| 2960 | { | 2907 | { | 
| @@ -2991,6 +2938,8 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
| 2991 | BUG_ON(!buffer_locked(bh)); | 2938 | BUG_ON(!buffer_locked(bh)); | 
| 2992 | BUG_ON(!buffer_mapped(bh)); | 2939 | BUG_ON(!buffer_mapped(bh)); | 
| 2993 | BUG_ON(!bh->b_end_io); | 2940 | BUG_ON(!bh->b_end_io); | 
| 2941 | BUG_ON(buffer_delay(bh)); | ||
| 2942 | BUG_ON(buffer_unwritten(bh)); | ||
| 2994 | 2943 | ||
| 2995 | /* | 2944 | /* | 
| 2996 | * Mask in barrier bit for a write (could be either a WRITE or a | 2945 | * Mask in barrier bit for a write (could be either a WRITE or a | 
| @@ -3067,12 +3016,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) | |||
| 3067 | for (i = 0; i < nr; i++) { | 3016 | for (i = 0; i < nr; i++) { | 
| 3068 | struct buffer_head *bh = bhs[i]; | 3017 | struct buffer_head *bh = bhs[i]; | 
| 3069 | 3018 | ||
| 3070 | if (rw == SWRITE || rw == SWRITE_SYNC) | 3019 | if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) | 
| 3071 | lock_buffer(bh); | 3020 | lock_buffer(bh); | 
| 3072 | else if (!trylock_buffer(bh)) | 3021 | else if (!trylock_buffer(bh)) | 
| 3073 | continue; | 3022 | continue; | 
| 3074 | 3023 | ||
| 3075 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { | 3024 | if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || | 
| 3025 | rw == SWRITE_SYNC_PLUG) { | ||
| 3076 | if (test_clear_buffer_dirty(bh)) { | 3026 | if (test_clear_buffer_dirty(bh)) { | 
| 3077 | bh->b_end_io = end_buffer_write_sync; | 3027 | bh->b_end_io = end_buffer_write_sync; | 
| 3078 | get_bh(bh); | 3028 | get_bh(bh); | 
| @@ -3108,7 +3058,7 @@ int sync_dirty_buffer(struct buffer_head *bh) | |||
| 3108 | if (test_clear_buffer_dirty(bh)) { | 3058 | if (test_clear_buffer_dirty(bh)) { | 
| 3109 | get_bh(bh); | 3059 | get_bh(bh); | 
| 3110 | bh->b_end_io = end_buffer_write_sync; | 3060 | bh->b_end_io = end_buffer_write_sync; | 
| 3111 | ret = submit_bh(WRITE, bh); | 3061 | ret = submit_bh(WRITE_SYNC, bh); | 
| 3112 | wait_on_buffer(bh); | 3062 | wait_on_buffer(bh); | 
| 3113 | if (buffer_eopnotsupp(bh)) { | 3063 | if (buffer_eopnotsupp(bh)) { | 
| 3114 | clear_buffer_eopnotsupp(bh); | 3064 | clear_buffer_eopnotsupp(bh); | 
| @@ -3422,11 +3372,12 @@ EXPORT_SYMBOL(block_read_full_page); | |||
| 3422 | EXPORT_SYMBOL(block_sync_page); | 3372 | EXPORT_SYMBOL(block_sync_page); | 
| 3423 | EXPORT_SYMBOL(block_truncate_page); | 3373 | EXPORT_SYMBOL(block_truncate_page); | 
| 3424 | EXPORT_SYMBOL(block_write_full_page); | 3374 | EXPORT_SYMBOL(block_write_full_page); | 
| 3375 | EXPORT_SYMBOL(block_write_full_page_endio); | ||
| 3425 | EXPORT_SYMBOL(cont_write_begin); | 3376 | EXPORT_SYMBOL(cont_write_begin); | 
| 3426 | EXPORT_SYMBOL(end_buffer_read_sync); | 3377 | EXPORT_SYMBOL(end_buffer_read_sync); | 
| 3427 | EXPORT_SYMBOL(end_buffer_write_sync); | 3378 | EXPORT_SYMBOL(end_buffer_write_sync); | 
| 3379 | EXPORT_SYMBOL(end_buffer_async_write); | ||
| 3428 | EXPORT_SYMBOL(file_fsync); | 3380 | EXPORT_SYMBOL(file_fsync); | 
| 3429 | EXPORT_SYMBOL(fsync_bdev); | ||
| 3430 | EXPORT_SYMBOL(generic_block_bmap); | 3381 | EXPORT_SYMBOL(generic_block_bmap); | 
| 3431 | EXPORT_SYMBOL(generic_cont_expand_simple); | 3382 | EXPORT_SYMBOL(generic_cont_expand_simple); | 
| 3432 | EXPORT_SYMBOL(init_buffer); | 3383 | EXPORT_SYMBOL(init_buffer); | 
