aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2015-03-17 12:23:19 -0400
committerTheodore Ts'o <tytso@mit.edu>2015-03-17 12:23:19 -0400
commita2f4870697a5bcf4a87073ec6b32dd2928c1211d (patch)
tree9be7de5dabd5b2f6ba96088d043b4a0c9b1cc510 /fs/fs-writeback.c
parent13a7a6ac0a11197edcd0f756a035f472b42cdf8b (diff)
fs: make sure the timestamps for lazytime inodes eventually get written
Jan Kara pointed out that if there is an inode which is constantly getting dirtied with I_DIRTY_PAGES, an inode with an updated timestamp will never be written since inode->dirtied_when is constantly getting updated. We fix this by adding an extra field to the inode, dirtied_time_when, so inodes with a stale dirtytime can get detected and handled. In addition, if we have a dirtytime inode caused by an atime update, and there is no write activity on the file system, we need to have a secondary system to make sure these inodes get written out. We do this by setting up a second delayed work structure which wakes up the CPU much more rarely compared to writeback_expire_centisecs. Signed-off-by: Theodore Ts'o <tytso@mit.edu> Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c82
1 files changed, 72 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..2cfcd74faf87 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
53 struct completion *done; /* set if the caller waits */ 53 struct completion *done; /* set if the caller waits */
54}; 54};
55 55
56/*
57 * If an inode is constantly having its pages dirtied, but then the
58 * updates stop dirtytime_expire_interval seconds in the past, it's
59 * possible for the worst case time between when an inode has its
60 * timestamps updated and when they finally get written out to be two
61 * dirtytime_expire_intervals. We set the default to 12 hours (in
62 * seconds), which means most of the time inodes will have their
63 * timestamps written to disk after 12 hours, but in the worst case a
64 * few inodes might not their timestamps updated for 24 hours.
65 */
66unsigned int dirtytime_expire_interval = 12 * 60 * 60;
67
56/** 68/**
57 * writeback_in_progress - determine whether there is writeback in progress 69 * writeback_in_progress - determine whether there is writeback in progress
58 * @bdi: the device's backing_dev_info structure. 70 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
275 287
276 if ((flags & EXPIRE_DIRTY_ATIME) == 0) 288 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
277 older_than_this = work->older_than_this; 289 older_than_this = work->older_than_this;
278 else if ((work->reason == WB_REASON_SYNC) == 0) { 290 else if (!work->for_sync) {
279 expire_time = jiffies - (HZ * 86400); 291 expire_time = jiffies - (dirtytime_expire_interval * HZ);
280 older_than_this = &expire_time; 292 older_than_this = &expire_time;
281 } 293 }
282 while (!list_empty(delaying_queue)) { 294 while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
458 */ 470 */
459 redirty_tail(inode, wb); 471 redirty_tail(inode, wb);
460 } else if (inode->i_state & I_DIRTY_TIME) { 472 } else if (inode->i_state & I_DIRTY_TIME) {
473 inode->dirtied_when = jiffies;
461 list_move(&inode->i_wb_list, &wb->b_dirty_time); 474 list_move(&inode->i_wb_list, &wb->b_dirty_time);
462 } else { 475 } else {
463 /* The inode is clean. Remove from writeback lists. */ 476 /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
505 spin_lock(&inode->i_lock); 518 spin_lock(&inode->i_lock);
506 519
507 dirty = inode->i_state & I_DIRTY; 520 dirty = inode->i_state & I_DIRTY;
508 if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && 521 if (inode->i_state & I_DIRTY_TIME) {
509 (inode->i_state & I_DIRTY_TIME)) || 522 if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
510 (inode->i_state & I_DIRTY_TIME_EXPIRED)) { 523 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
511 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; 524 unlikely(time_after(jiffies,
512 trace_writeback_lazytime(inode); 525 (inode->dirtied_time_when +
513 } 526 dirtytime_expire_interval * HZ)))) {
527 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
528 trace_writeback_lazytime(inode);
529 }
530 } else
531 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
514 inode->i_state &= ~dirty; 532 inode->i_state &= ~dirty;
515 533
516 /* 534 /*
@@ -1131,6 +1149,45 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1131 rcu_read_unlock(); 1149 rcu_read_unlock();
1132} 1150}
1133 1151
1152/*
1153 * Wake up bdi's periodically to make sure dirtytime inodes gets
1154 * written back periodically. We deliberately do *not* check the
1155 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
1156 * kernel to be constantly waking up once there are any dirtytime
1157 * inodes on the system. So instead we define a separate delayed work
1158 * function which gets called much more rarely. (By default, only
1159 * once every 12 hours.)
1160 *
1161 * If there is any other write activity going on in the file system,
1162 * this function won't be necessary. But if the only thing that has
1163 * happened on the file system is a dirtytime inode caused by an atime
1164 * update, we need this infrastructure below to make sure that inode
1165 * eventually gets pushed out to disk.
1166 */
1167static void wakeup_dirtytime_writeback(struct work_struct *w);
1168static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
1169
1170static void wakeup_dirtytime_writeback(struct work_struct *w)
1171{
1172 struct backing_dev_info *bdi;
1173
1174 rcu_read_lock();
1175 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1176 if (list_empty(&bdi->wb.b_dirty_time))
1177 continue;
1178 bdi_wakeup_thread(bdi);
1179 }
1180 rcu_read_unlock();
1181 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1182}
1183
1184static int __init start_dirtytime_writeback(void)
1185{
1186 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1187 return 0;
1188}
1189__initcall(start_dirtytime_writeback);
1190
1134static noinline void block_dump___mark_inode_dirty(struct inode *inode) 1191static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1135{ 1192{
1136 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 1193 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1326,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1269 } 1326 }
1270 1327
1271 inode->dirtied_when = jiffies; 1328 inode->dirtied_when = jiffies;
1272 list_move(&inode->i_wb_list, dirtytime ? 1329 if (dirtytime)
1273 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); 1330 inode->dirtied_time_when = jiffies;
1331 if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
1332 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1333 else
1334 list_move(&inode->i_wb_list,
1335 &bdi->wb.b_dirty_time);
1274 spin_unlock(&bdi->wb.list_lock); 1336 spin_unlock(&bdi->wb.list_lock);
1275 trace_writeback_dirty_inode_enqueue(inode); 1337 trace_writeback_dirty_inode_enqueue(inode);
1276 1338