diff options
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r-- | fs/fs-writeback.c | 93 |
1 files changed, 83 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..32a8bbd7a9ad 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -53,6 +53,18 @@ struct wb_writeback_work { | |||
53 | struct completion *done; /* set if the caller waits */ | 53 | struct completion *done; /* set if the caller waits */ |
54 | }; | 54 | }; |
55 | 55 | ||
56 | /* | ||
57 | * If an inode is constantly having its pages dirtied, but then the | ||
58 | * updates stop dirtytime_expire_interval seconds in the past, it's | ||
59 | * possible for the worst case time between when an inode has its | ||
60 | * timestamps updated and when they finally get written out to be two | ||
61 | * dirtytime_expire_intervals. We set the default to 12 hours (in | ||
62 | * seconds), which means most of the time inodes will have their | ||
63 | * timestamps written to disk after 12 hours, but in the worst case a | ||
64 | * few inodes might not their timestamps updated for 24 hours. | ||
65 | */ | ||
66 | unsigned int dirtytime_expire_interval = 12 * 60 * 60; | ||
67 | |||
56 | /** | 68 | /** |
57 | * writeback_in_progress - determine whether there is writeback in progress | 69 | * writeback_in_progress - determine whether there is writeback in progress |
58 | * @bdi: the device's backing_dev_info structure. | 70 | * @bdi: the device's backing_dev_info structure. |
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
275 | 287 | ||
276 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) | 288 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) |
277 | older_than_this = work->older_than_this; | 289 | older_than_this = work->older_than_this; |
278 | else if ((work->reason == WB_REASON_SYNC) == 0) { | 290 | else if (!work->for_sync) { |
279 | expire_time = jiffies - (HZ * 86400); | 291 | expire_time = jiffies - (dirtytime_expire_interval * HZ); |
280 | older_than_this = &expire_time; | 292 | older_than_this = &expire_time; |
281 | } | 293 | } |
282 | while (!list_empty(delaying_queue)) { | 294 | while (!list_empty(delaying_queue)) { |
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
458 | */ | 470 | */ |
459 | redirty_tail(inode, wb); | 471 | redirty_tail(inode, wb); |
460 | } else if (inode->i_state & I_DIRTY_TIME) { | 472 | } else if (inode->i_state & I_DIRTY_TIME) { |
473 | inode->dirtied_when = jiffies; | ||
461 | list_move(&inode->i_wb_list, &wb->b_dirty_time); | 474 | list_move(&inode->i_wb_list, &wb->b_dirty_time); |
462 | } else { | 475 | } else { |
463 | /* The inode is clean. Remove from writeback lists. */ | 476 | /* The inode is clean. Remove from writeback lists. */ |
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
505 | spin_lock(&inode->i_lock); | 518 | spin_lock(&inode->i_lock); |
506 | 519 | ||
507 | dirty = inode->i_state & I_DIRTY; | 520 | dirty = inode->i_state & I_DIRTY; |
508 | if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && | 521 | if (inode->i_state & I_DIRTY_TIME) { |
509 | (inode->i_state & I_DIRTY_TIME)) || | 522 | if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || |
510 | (inode->i_state & I_DIRTY_TIME_EXPIRED)) { | 523 | unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || |
511 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | 524 | unlikely(time_after(jiffies, |
512 | trace_writeback_lazytime(inode); | 525 | (inode->dirtied_time_when + |
513 | } | 526 | dirtytime_expire_interval * HZ)))) { |
527 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | ||
528 | trace_writeback_lazytime(inode); | ||
529 | } | ||
530 | } else | ||
531 | inode->i_state &= ~I_DIRTY_TIME_EXPIRED; | ||
514 | inode->i_state &= ~dirty; | 532 | inode->i_state &= ~dirty; |
515 | 533 | ||
516 | /* | 534 | /* |
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) | |||
1131 | rcu_read_unlock(); | 1149 | rcu_read_unlock(); |
1132 | } | 1150 | } |
1133 | 1151 | ||
1152 | /* | ||
1153 | * Wake up bdi's periodically to make sure dirtytime inodes gets | ||
1154 | * written back periodically. We deliberately do *not* check the | ||
1155 | * b_dirtytime list in wb_has_dirty_io(), since this would cause the | ||
1156 | * kernel to be constantly waking up once there are any dirtytime | ||
1157 | * inodes on the system. So instead we define a separate delayed work | ||
1158 | * function which gets called much more rarely. (By default, only | ||
1159 | * once every 12 hours.) | ||
1160 | * | ||
1161 | * If there is any other write activity going on in the file system, | ||
1162 | * this function won't be necessary. But if the only thing that has | ||
1163 | * happened on the file system is a dirtytime inode caused by an atime | ||
1164 | * update, we need this infrastructure below to make sure that inode | ||
1165 | * eventually gets pushed out to disk. | ||
1166 | */ | ||
1167 | static void wakeup_dirtytime_writeback(struct work_struct *w); | ||
1168 | static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); | ||
1169 | |||
1170 | static void wakeup_dirtytime_writeback(struct work_struct *w) | ||
1171 | { | ||
1172 | struct backing_dev_info *bdi; | ||
1173 | |||
1174 | rcu_read_lock(); | ||
1175 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | ||
1176 | if (list_empty(&bdi->wb.b_dirty_time)) | ||
1177 | continue; | ||
1178 | bdi_wakeup_thread(bdi); | ||
1179 | } | ||
1180 | rcu_read_unlock(); | ||
1181 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
1182 | } | ||
1183 | |||
1184 | static int __init start_dirtytime_writeback(void) | ||
1185 | { | ||
1186 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
1187 | return 0; | ||
1188 | } | ||
1189 | __initcall(start_dirtytime_writeback); | ||
1190 | |||
1191 | int dirtytime_interval_handler(struct ctl_table *table, int write, | ||
1192 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
1193 | { | ||
1194 | int ret; | ||
1195 | |||
1196 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
1197 | if (ret == 0 && write) | ||
1198 | mod_delayed_work(system_wq, &dirtytime_work, 0); | ||
1199 | return ret; | ||
1200 | } | ||
1201 | |||
1134 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 1202 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
1135 | { | 1203 | { |
1136 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 1204 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1269 | } | 1337 | } |
1270 | 1338 | ||
1271 | inode->dirtied_when = jiffies; | 1339 | inode->dirtied_when = jiffies; |
1272 | list_move(&inode->i_wb_list, dirtytime ? | 1340 | if (dirtytime) |
1273 | &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); | 1341 | inode->dirtied_time_when = jiffies; |
1342 | if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) | ||
1343 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | ||
1344 | else | ||
1345 | list_move(&inode->i_wb_list, | ||
1346 | &bdi->wb.b_dirty_time); | ||
1274 | spin_unlock(&bdi->wb.list_lock); | 1347 | spin_unlock(&bdi->wb.list_lock); |
1275 | trace_writeback_dirty_inode_enqueue(inode); | 1348 | trace_writeback_dirty_inode_enqueue(inode); |
1276 | 1349 | ||