aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c198
1 files changed, 77 insertions, 121 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 91013ff7dd53..c54226be5294 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi)
64 clear_bit(BDI_pdflush, &bdi->state); 64 clear_bit(BDI_pdflush, &bdi->state);
65} 65}
66 66
67static noinline void block_dump___mark_inode_dirty(struct inode *inode)
68{
69 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
70 struct dentry *dentry;
71 const char *name = "?";
72
73 dentry = d_find_alias(inode);
74 if (dentry) {
75 spin_lock(&dentry->d_lock);
76 name = (const char *) dentry->d_name.name;
77 }
78 printk(KERN_DEBUG
79 "%s(%d): dirtied inode %lu (%s) on %s\n",
80 current->comm, task_pid_nr(current), inode->i_ino,
81 name, inode->i_sb->s_id);
82 if (dentry) {
83 spin_unlock(&dentry->d_lock);
84 dput(dentry);
85 }
86 }
87}
88
67/** 89/**
68 * __mark_inode_dirty - internal function 90 * __mark_inode_dirty - internal function
69 * @inode: inode to mark 91 * @inode: inode to mark
@@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
114 if ((inode->i_state & flags) == flags) 136 if ((inode->i_state & flags) == flags)
115 return; 137 return;
116 138
117 if (unlikely(block_dump)) { 139 if (unlikely(block_dump))
118 struct dentry *dentry = NULL; 140 block_dump___mark_inode_dirty(inode);
119 const char *name = "?";
120
121 if (!list_empty(&inode->i_dentry)) {
122 dentry = list_entry(inode->i_dentry.next,
123 struct dentry, d_alias);
124 if (dentry && dentry->d_name.name)
125 name = (const char *) dentry->d_name.name;
126 }
127
128 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
129 printk(KERN_DEBUG
130 "%s(%d): dirtied inode %lu (%s) on %s\n",
131 current->comm, task_pid_nr(current), inode->i_ino,
132 name, inode->i_sb->s_id);
133 }
134 141
135 spin_lock(&inode_lock); 142 spin_lock(&inode_lock);
136 if ((inode->i_state & flags) != flags) { 143 if ((inode->i_state & flags) != flags) {
@@ -271,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb)
271EXPORT_SYMBOL(sb_has_dirty_inodes); 278EXPORT_SYMBOL(sb_has_dirty_inodes);
272 279
273/* 280/*
274 * Write a single inode's dirty pages and inode data out to disk. 281 * Wait for writeback on an inode to complete.
282 */
283static void inode_wait_for_writeback(struct inode *inode)
284{
285 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
286 wait_queue_head_t *wqh;
287
288 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
289 do {
290 spin_unlock(&inode_lock);
291 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE);
292 spin_lock(&inode_lock);
293 } while (inode->i_state & I_SYNC);
294}
295
296/*
297 * Write out an inode's dirty pages. Called under inode_lock. Either the
298 * caller has ref on the inode (either via __iget or via syscall against an fd)
299 * or the inode has I_WILL_FREE set (via generic_forget_inode)
300 *
275 * If `wait' is set, wait on the writeout. 301 * If `wait' is set, wait on the writeout.
276 * 302 *
277 * The whole writeout design is quite complex and fragile. We want to avoid 303 * The whole writeout design is quite complex and fragile. We want to avoid
@@ -281,15 +307,39 @@ EXPORT_SYMBOL(sb_has_dirty_inodes);
281 * Called under inode_lock. 307 * Called under inode_lock.
282 */ 308 */
283static int 309static int
284__sync_single_inode(struct inode *inode, struct writeback_control *wbc) 310writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
285{ 311{
286 unsigned dirty;
287 struct address_space *mapping = inode->i_mapping; 312 struct address_space *mapping = inode->i_mapping;
288 int wait = wbc->sync_mode == WB_SYNC_ALL; 313 int wait = wbc->sync_mode == WB_SYNC_ALL;
314 unsigned dirty;
289 int ret; 315 int ret;
290 316
317 if (!atomic_read(&inode->i_count))
318 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
319 else
320 WARN_ON(inode->i_state & I_WILL_FREE);
321
322 if (inode->i_state & I_SYNC) {
323 /*
324 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that
326 * writeback can proceed with the other inodes on s_io.
327 *
328 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io.
330 */
331 if (!wait) {
332 requeue_io(inode);
333 return 0;
334 }
335
336 /*
337 * It's a data-integrity sync. We must wait.
338 */
339 inode_wait_for_writeback(inode);
340 }
341
291 BUG_ON(inode->i_state & I_SYNC); 342 BUG_ON(inode->i_state & I_SYNC);
292 WARN_ON(inode->i_state & I_NEW);
293 343
294 /* Set I_SYNC, reset I_DIRTY */ 344 /* Set I_SYNC, reset I_DIRTY */
295 dirty = inode->i_state & I_DIRTY; 345 dirty = inode->i_state & I_DIRTY;
@@ -314,9 +364,8 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
314 } 364 }
315 365
316 spin_lock(&inode_lock); 366 spin_lock(&inode_lock);
317 WARN_ON(inode->i_state & I_NEW);
318 inode->i_state &= ~I_SYNC; 367 inode->i_state &= ~I_SYNC;
319 if (!(inode->i_state & I_FREEING)) { 368 if (!(inode->i_state & (I_FREEING | I_CLEAR))) {
320 if (!(inode->i_state & I_DIRTY) && 369 if (!(inode->i_state & I_DIRTY) &&
321 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 370 mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
322 /* 371 /*
@@ -385,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
385} 434}
386 435
387/* 436/*
388 * Write out an inode's dirty pages. Called under inode_lock. Either the
389 * caller has ref on the inode (either via __iget or via syscall against an fd)
390 * or the inode has I_WILL_FREE set (via generic_forget_inode)
391 */
392static int
393__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
394{
395 wait_queue_head_t *wqh;
396
397 if (!atomic_read(&inode->i_count))
398 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
399 else
400 WARN_ON(inode->i_state & I_WILL_FREE);
401
402 if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) {
403 /*
404 * We're skipping this inode because it's locked, and we're not
405 * doing writeback-for-data-integrity. Move it to s_more_io so
406 * that writeback can proceed with the other inodes on s_io.
407 * We'll have another go at writing back this inode when we
408 * completed a full scan of s_io.
409 */
410 requeue_io(inode);
411 return 0;
412 }
413
414 /*
415 * It's a data-integrity sync. We must wait.
416 */
417 if (inode->i_state & I_SYNC) {
418 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
419
420 wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
421 do {
422 spin_unlock(&inode_lock);
423 __wait_on_bit(wqh, &wq, inode_wait,
424 TASK_UNINTERRUPTIBLE);
425 spin_lock(&inode_lock);
426 } while (inode->i_state & I_SYNC);
427 }
428 return __sync_single_inode(inode, wbc);
429}
430
431/*
432 * Write out a superblock's list of dirty inodes. A wait will be performed 437 * Write out a superblock's list of dirty inodes. A wait will be performed
433 * upon no inodes, all inodes or the final one, depending upon sync_mode. 438 * upon no inodes, all inodes or the final one, depending upon sync_mode.
434 * 439 *
@@ -487,7 +492,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
487 break; 492 break;
488 } 493 }
489 494
490 if (inode->i_state & I_NEW) { 495 if (inode->i_state & (I_NEW | I_WILL_FREE)) {
491 requeue_io(inode); 496 requeue_io(inode);
492 continue; 497 continue;
493 } 498 }
@@ -518,10 +523,10 @@ void generic_sync_sb_inodes(struct super_block *sb,
518 if (current_is_pdflush() && !writeback_acquire(bdi)) 523 if (current_is_pdflush() && !writeback_acquire(bdi))
519 break; 524 break;
520 525
521 BUG_ON(inode->i_state & I_FREEING); 526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
522 __iget(inode); 527 __iget(inode);
523 pages_skipped = wbc->pages_skipped; 528 pages_skipped = wbc->pages_skipped;
524 __writeback_single_inode(inode, wbc); 529 writeback_single_inode(inode, wbc);
525 if (current_is_pdflush()) 530 if (current_is_pdflush())
526 writeback_release(bdi); 531 writeback_release(bdi);
527 if (wbc->pages_skipped != pages_skipped) { 532 if (wbc->pages_skipped != pages_skipped) {
@@ -679,55 +684,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
679} 684}
680 685
681/** 686/**
682 * sync_inodes - writes all inodes to disk
683 * @wait: wait for completion
684 *
685 * sync_inodes() goes through each super block's dirty inode list, writes the
686 * inodes out, waits on the writeout and puts the inodes back on the normal
687 * list.
688 *
689 * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle
690 * part of the sync functions is that the blockdev "superblock" is processed
691 * last. This is because the write_inode() function of a typical fs will
692 * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
693 * What we want to do is to perform all that dirtying first, and then write
694 * back all those inode blocks via the blockdev mapping in one sweep. So the
695 * additional (somewhat redundant) sync_blockdev() calls here are to make
696 * sure that really happens. Because if we call sync_inodes_sb(wait=1) with
697 * outstanding dirty inodes, the writeback goes block-at-a-time within the
698 * filesystem's write_inode(). This is extremely slow.
699 */
700static void __sync_inodes(int wait)
701{
702 struct super_block *sb;
703
704 spin_lock(&sb_lock);
705restart:
706 list_for_each_entry(sb, &super_blocks, s_list) {
707 sb->s_count++;
708 spin_unlock(&sb_lock);
709 down_read(&sb->s_umount);
710 if (sb->s_root) {
711 sync_inodes_sb(sb, wait);
712 sync_blockdev(sb->s_bdev);
713 }
714 up_read(&sb->s_umount);
715 spin_lock(&sb_lock);
716 if (__put_super_and_need_restart(sb))
717 goto restart;
718 }
719 spin_unlock(&sb_lock);
720}
721
722void sync_inodes(int wait)
723{
724 __sync_inodes(0);
725
726 if (wait)
727 __sync_inodes(1);
728}
729
730/**
731 * write_inode_now - write an inode to disk 687 * write_inode_now - write an inode to disk
732 * @inode: inode to write to disk 688 * @inode: inode to write to disk
733 * @sync: whether the write should be synchronous or not 689 * @sync: whether the write should be synchronous or not
@@ -752,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync)
752 708
753 might_sleep(); 709 might_sleep();
754 spin_lock(&inode_lock); 710 spin_lock(&inode_lock);
755 ret = __writeback_single_inode(inode, &wbc); 711 ret = writeback_single_inode(inode, &wbc);
756 spin_unlock(&inode_lock); 712 spin_unlock(&inode_lock);
757 if (sync) 713 if (sync)
758 inode_sync_wait(inode); 714 inode_sync_wait(inode);
@@ -776,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
776 int ret; 732 int ret;
777 733
778 spin_lock(&inode_lock); 734 spin_lock(&inode_lock);
779 ret = __writeback_single_inode(inode, wbc); 735 ret = writeback_single_inode(inode, wbc);
780 spin_unlock(&inode_lock); 736 spin_unlock(&inode_lock);
781 return ret; 737 return ret;
782} 738}