aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-01 13:05:42 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-01 13:05:42 -0400
commitb6c3a5946c60ed2e3da33443a7db8f000ba108c5 (patch)
tree3c3dd281752222aa8125f0c01add6067a8920f15 /fs
parent1e848913f017cb84256ef5fe4cdd5cc4b935bd36 (diff)
parent1efff914afac8a965ad63817ecf8861a927c2ace (diff)
Merge tag 'lazytime_fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull lazytime fixes from Ted Ts'o: "This fixes a problem in the lazy time patches, which can cause frequently updated inods to never have their timestamps updated. These changes guarantee that no timestamp on disk will be stale by more than 24 hours" * tag 'lazytime_fix' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: fs: add dirtytime_expire_seconds sysctl fs: make sure the timestamps for lazytime inodes eventually get written
Diffstat (limited to 'fs')
-rw-r--r--fs/fs-writeback.c93
1 files changed, 83 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e907052eeadb..32a8bbd7a9ad 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,6 +53,18 @@ struct wb_writeback_work {
53 struct completion *done; /* set if the caller waits */ 53 struct completion *done; /* set if the caller waits */
54}; 54};
55 55
56/*
57 * If an inode is constantly having its pages dirtied, but then the
58 * updates stop dirtytime_expire_interval seconds in the past, it's
59 * possible for the worst case time between when an inode has its
60 * timestamps updated and when they finally get written out to be two
61 * dirtytime_expire_intervals. We set the default to 12 hours (in
62 * seconds), which means most of the time inodes will have their
63 * timestamps written to disk after 12 hours, but in the worst case a
64 * few inodes might not their timestamps updated for 24 hours.
65 */
66unsigned int dirtytime_expire_interval = 12 * 60 * 60;
67
56/** 68/**
57 * writeback_in_progress - determine whether there is writeback in progress 69 * writeback_in_progress - determine whether there is writeback in progress
58 * @bdi: the device's backing_dev_info structure. 70 * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
275 287
276 if ((flags & EXPIRE_DIRTY_ATIME) == 0) 288 if ((flags & EXPIRE_DIRTY_ATIME) == 0)
277 older_than_this = work->older_than_this; 289 older_than_this = work->older_than_this;
278 else if ((work->reason == WB_REASON_SYNC) == 0) { 290 else if (!work->for_sync) {
279 expire_time = jiffies - (HZ * 86400); 291 expire_time = jiffies - (dirtytime_expire_interval * HZ);
280 older_than_this = &expire_time; 292 older_than_this = &expire_time;
281 } 293 }
282 while (!list_empty(delaying_queue)) { 294 while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
458 */ 470 */
459 redirty_tail(inode, wb); 471 redirty_tail(inode, wb);
460 } else if (inode->i_state & I_DIRTY_TIME) { 472 } else if (inode->i_state & I_DIRTY_TIME) {
473 inode->dirtied_when = jiffies;
461 list_move(&inode->i_wb_list, &wb->b_dirty_time); 474 list_move(&inode->i_wb_list, &wb->b_dirty_time);
462 } else { 475 } else {
463 /* The inode is clean. Remove from writeback lists. */ 476 /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
505 spin_lock(&inode->i_lock); 518 spin_lock(&inode->i_lock);
506 519
507 dirty = inode->i_state & I_DIRTY; 520 dirty = inode->i_state & I_DIRTY;
508 if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && 521 if (inode->i_state & I_DIRTY_TIME) {
509 (inode->i_state & I_DIRTY_TIME)) || 522 if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
510 (inode->i_state & I_DIRTY_TIME_EXPIRED)) { 523 unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
511 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; 524 unlikely(time_after(jiffies,
512 trace_writeback_lazytime(inode); 525 (inode->dirtied_time_when +
513 } 526 dirtytime_expire_interval * HZ)))) {
527 dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
528 trace_writeback_lazytime(inode);
529 }
530 } else
531 inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
514 inode->i_state &= ~dirty; 532 inode->i_state &= ~dirty;
515 533
516 /* 534 /*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
1131 rcu_read_unlock(); 1149 rcu_read_unlock();
1132} 1150}
1133 1151
1152/*
1153 * Wake up bdi's periodically to make sure dirtytime inodes gets
1154 * written back periodically. We deliberately do *not* check the
1155 * b_dirtytime list in wb_has_dirty_io(), since this would cause the
1156 * kernel to be constantly waking up once there are any dirtytime
1157 * inodes on the system. So instead we define a separate delayed work
1158 * function which gets called much more rarely. (By default, only
1159 * once every 12 hours.)
1160 *
1161 * If there is any other write activity going on in the file system,
1162 * this function won't be necessary. But if the only thing that has
1163 * happened on the file system is a dirtytime inode caused by an atime
1164 * update, we need this infrastructure below to make sure that inode
1165 * eventually gets pushed out to disk.
1166 */
1167static void wakeup_dirtytime_writeback(struct work_struct *w);
1168static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
1169
1170static void wakeup_dirtytime_writeback(struct work_struct *w)
1171{
1172 struct backing_dev_info *bdi;
1173
1174 rcu_read_lock();
1175 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
1176 if (list_empty(&bdi->wb.b_dirty_time))
1177 continue;
1178 bdi_wakeup_thread(bdi);
1179 }
1180 rcu_read_unlock();
1181 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1182}
1183
1184static int __init start_dirtytime_writeback(void)
1185{
1186 schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
1187 return 0;
1188}
1189__initcall(start_dirtytime_writeback);
1190
1191int dirtytime_interval_handler(struct ctl_table *table, int write,
1192 void __user *buffer, size_t *lenp, loff_t *ppos)
1193{
1194 int ret;
1195
1196 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
1197 if (ret == 0 && write)
1198 mod_delayed_work(system_wq, &dirtytime_work, 0);
1199 return ret;
1200}
1201
1134static noinline void block_dump___mark_inode_dirty(struct inode *inode) 1202static noinline void block_dump___mark_inode_dirty(struct inode *inode)
1135{ 1203{
1136 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 1204 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1269 } 1337 }
1270 1338
1271 inode->dirtied_when = jiffies; 1339 inode->dirtied_when = jiffies;
1272 list_move(&inode->i_wb_list, dirtytime ? 1340 if (dirtytime)
1273 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); 1341 inode->dirtied_time_when = jiffies;
1342 if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
1343 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1344 else
1345 list_move(&inode->i_wb_list,
1346 &bdi->wb.b_dirty_time);
1274 spin_unlock(&bdi->wb.list_lock); 1347 spin_unlock(&bdi->wb.list_lock);
1275 trace_writeback_dirty_inode_enqueue(inode); 1348 trace_writeback_dirty_inode_enqueue(inode);
1276 1349