diff options
author | Theodore Ts'o <tytso@mit.edu> | 2015-03-17 12:23:19 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2015-03-17 12:23:19 -0400 |
commit | a2f4870697a5bcf4a87073ec6b32dd2928c1211d (patch) | |
tree | 9be7de5dabd5b2f6ba96088d043b4a0c9b1cc510 /fs/fs-writeback.c | |
parent | 13a7a6ac0a11197edcd0f756a035f472b42cdf8b (diff) |
fs: make sure the timestamps for lazytime inodes eventually get written
Jan Kara pointed out that if there is an inode which is constantly
getting dirtied with I_DIRTY_PAGES, an inode with an updated timestamp
will never be written since inode->dirtied_when is constantly getting
updated. We fix this by adding an extra field to the inode,
dirtied_time_when, so inodes with a stale dirtytime can get detected
and handled.
In addition, if we have a dirtytime inode caused by an atime update,
and there is no write activity on the file system, we need to have a
secondary system to make sure these inodes get written out. We do
this by setting up a second delayed work structure which wakes up the
CPU much more rarely compared to writeback_expire_centisecs.
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 82 |
1 files changed, 72 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..2cfcd74faf87 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -53,6 +53,18 @@ struct wb_writeback_work { | |||
53 | struct completion *done; /* set if the caller waits */ | 53 | struct completion *done; /* set if the caller waits */ |
54 | }; | 54 | }; |
55 | 55 | ||
56 | /* | ||
57 | * If an inode is constantly having its pages dirtied, but then the | ||
58 | * updates stop dirtytime_expire_interval seconds in the past, it's | ||
59 | * possible for the worst case time between when an inode has its | ||
60 | * timestamps updated and when they finally get written out to be two | ||
61 | * dirtytime_expire_intervals. We set the default to 12 hours (in | ||
62 | * seconds), which means most of the time inodes will have their | ||
63 | * timestamps written to disk after 12 hours, but in the worst case a | ||
64 | * few inodes might not their timestamps updated for 24 hours. | ||
65 | */ | ||
66 | unsigned int dirtytime_expire_interval = 12 * 60 * 60; | ||
67 | |||
56 | /** | 68 | /** |
57 | * writeback_in_progress - determine whether there is writeback in progress | 69 | * writeback_in_progress - determine whether there is writeback in progress |
58 | * @bdi: the device's backing_dev_info structure. | 70 | * @bdi: the device's backing_dev_info structure. |
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
275 | 287 | ||
276 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) | 288 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) |
277 | older_than_this = work->older_than_this; | 289 | older_than_this = work->older_than_this; |
278 | else if ((work->reason == WB_REASON_SYNC) == 0) { | 290 | else if (!work->for_sync) { |
279 | expire_time = jiffies - (HZ * 86400); | 291 | expire_time = jiffies - (dirtytime_expire_interval * HZ); |
280 | older_than_this = &expire_time; | 292 | older_than_this = &expire_time; |
281 | } | 293 | } |
282 | while (!list_empty(delaying_queue)) { | 294 | while (!list_empty(delaying_queue)) { |
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
458 | */ | 470 | */ |
459 | redirty_tail(inode, wb); | 471 | redirty_tail(inode, wb); |
460 | } else if (inode->i_state & I_DIRTY_TIME) { | 472 | } else if (inode->i_state & I_DIRTY_TIME) { |
473 | inode->dirtied_when = jiffies; | ||
461 | list_move(&inode->i_wb_list, &wb->b_dirty_time); | 474 | list_move(&inode->i_wb_list, &wb->b_dirty_time); |
462 | } else { | 475 | } else { |
463 | /* The inode is clean. Remove from writeback lists. */ | 476 | /* The inode is clean. Remove from writeback lists. */ |
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
505 | spin_lock(&inode->i_lock); | 518 | spin_lock(&inode->i_lock); |
506 | 519 | ||
507 | dirty = inode->i_state & I_DIRTY; | 520 | dirty = inode->i_state & I_DIRTY; |
508 | if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && | 521 | if (inode->i_state & I_DIRTY_TIME) { |
509 | (inode->i_state & I_DIRTY_TIME)) || | 522 | if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || |
510 | (inode->i_state & I_DIRTY_TIME_EXPIRED)) { | 523 | unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || |
511 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | 524 | unlikely(time_after(jiffies, |
512 | trace_writeback_lazytime(inode); | 525 | (inode->dirtied_time_when + |
513 | } | 526 | dirtytime_expire_interval * HZ)))) { |
527 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | ||
528 | trace_writeback_lazytime(inode); | ||
529 | } | ||
530 | } else | ||
531 | inode->i_state &= ~I_DIRTY_TIME_EXPIRED; | ||
514 | inode->i_state &= ~dirty; | 532 | inode->i_state &= ~dirty; |
515 | 533 | ||
516 | /* | 534 | /* |
@@ -1131,6 +1149,45 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) | |||
1131 | rcu_read_unlock(); | 1149 | rcu_read_unlock(); |
1132 | } | 1150 | } |
1133 | 1151 | ||
1152 | /* | ||
1153 | * Wake up bdi's periodically to make sure dirtytime inodes gets | ||
1154 | * written back periodically. We deliberately do *not* check the | ||
1155 | * b_dirtytime list in wb_has_dirty_io(), since this would cause the | ||
1156 | * kernel to be constantly waking up once there are any dirtytime | ||
1157 | * inodes on the system. So instead we define a separate delayed work | ||
1158 | * function which gets called much more rarely. (By default, only | ||
1159 | * once every 12 hours.) | ||
1160 | * | ||
1161 | * If there is any other write activity going on in the file system, | ||
1162 | * this function won't be necessary. But if the only thing that has | ||
1163 | * happened on the file system is a dirtytime inode caused by an atime | ||
1164 | * update, we need this infrastructure below to make sure that inode | ||
1165 | * eventually gets pushed out to disk. | ||
1166 | */ | ||
1167 | static void wakeup_dirtytime_writeback(struct work_struct *w); | ||
1168 | static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); | ||
1169 | |||
1170 | static void wakeup_dirtytime_writeback(struct work_struct *w) | ||
1171 | { | ||
1172 | struct backing_dev_info *bdi; | ||
1173 | |||
1174 | rcu_read_lock(); | ||
1175 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | ||
1176 | if (list_empty(&bdi->wb.b_dirty_time)) | ||
1177 | continue; | ||
1178 | bdi_wakeup_thread(bdi); | ||
1179 | } | ||
1180 | rcu_read_unlock(); | ||
1181 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
1182 | } | ||
1183 | |||
1184 | static int __init start_dirtytime_writeback(void) | ||
1185 | { | ||
1186 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
1187 | return 0; | ||
1188 | } | ||
1189 | __initcall(start_dirtytime_writeback); | ||
1190 | |||
1134 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 1191 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
1135 | { | 1192 | { |
1136 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 1193 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
@@ -1269,8 +1326,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1269 | } | 1326 | } |
1270 | 1327 | ||
1271 | inode->dirtied_when = jiffies; | 1328 | inode->dirtied_when = jiffies; |
1272 | list_move(&inode->i_wb_list, dirtytime ? | 1329 | if (dirtytime) |
1273 | &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); | 1330 | inode->dirtied_time_when = jiffies; |
1331 | if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) | ||
1332 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | ||
1333 | else | ||
1334 | list_move(&inode->i_wb_list, | ||
1335 | &bdi->wb.b_dirty_time); | ||
1274 | spin_unlock(&bdi->wb.list_lock); | 1336 | spin_unlock(&bdi->wb.list_lock); |
1275 | trace_writeback_dirty_inode_enqueue(inode); | 1337 | trace_writeback_dirty_inode_enqueue(inode); |
1276 | 1338 | ||