diff options
| -rw-r--r-- | fs/fs-writeback.c | 93 | ||||
| -rw-r--r-- | include/linux/fs.h | 1 | ||||
| -rw-r--r-- | include/linux/writeback.h | 3 | ||||
| -rw-r--r-- | kernel/sysctl.c | 8 |
4 files changed, 95 insertions, 10 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e907052eeadb..32a8bbd7a9ad 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -53,6 +53,18 @@ struct wb_writeback_work { | |||
| 53 | struct completion *done; /* set if the caller waits */ | 53 | struct completion *done; /* set if the caller waits */ |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /* | ||
| 57 | * If an inode is constantly having its pages dirtied, but then the | ||
| 58 | * updates stop dirtytime_expire_interval seconds in the past, it's | ||
| 59 | * possible for the worst case time between when an inode has its | ||
| 60 | * timestamps updated and when they finally get written out to be two | ||
| 61 | * dirtytime_expire_intervals. We set the default to 12 hours (in | ||
| 62 | * seconds), which means most of the time inodes will have their | ||
| 63 | * timestamps written to disk after 12 hours, but in the worst case a | ||
| 64 | * few inodes might not their timestamps updated for 24 hours. | ||
| 65 | */ | ||
| 66 | unsigned int dirtytime_expire_interval = 12 * 60 * 60; | ||
| 67 | |||
| 56 | /** | 68 | /** |
| 57 | * writeback_in_progress - determine whether there is writeback in progress | 69 | * writeback_in_progress - determine whether there is writeback in progress |
| 58 | * @bdi: the device's backing_dev_info structure. | 70 | * @bdi: the device's backing_dev_info structure. |
| @@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
| 275 | 287 | ||
| 276 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) | 288 | if ((flags & EXPIRE_DIRTY_ATIME) == 0) |
| 277 | older_than_this = work->older_than_this; | 289 | older_than_this = work->older_than_this; |
| 278 | else if ((work->reason == WB_REASON_SYNC) == 0) { | 290 | else if (!work->for_sync) { |
| 279 | expire_time = jiffies - (HZ * 86400); | 291 | expire_time = jiffies - (dirtytime_expire_interval * HZ); |
| 280 | older_than_this = &expire_time; | 292 | older_than_this = &expire_time; |
| 281 | } | 293 | } |
| 282 | while (!list_empty(delaying_queue)) { | 294 | while (!list_empty(delaying_queue)) { |
| @@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, | |||
| 458 | */ | 470 | */ |
| 459 | redirty_tail(inode, wb); | 471 | redirty_tail(inode, wb); |
| 460 | } else if (inode->i_state & I_DIRTY_TIME) { | 472 | } else if (inode->i_state & I_DIRTY_TIME) { |
| 473 | inode->dirtied_when = jiffies; | ||
| 461 | list_move(&inode->i_wb_list, &wb->b_dirty_time); | 474 | list_move(&inode->i_wb_list, &wb->b_dirty_time); |
| 462 | } else { | 475 | } else { |
| 463 | /* The inode is clean. Remove from writeback lists. */ | 476 | /* The inode is clean. Remove from writeback lists. */ |
| @@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 505 | spin_lock(&inode->i_lock); | 518 | spin_lock(&inode->i_lock); |
| 506 | 519 | ||
| 507 | dirty = inode->i_state & I_DIRTY; | 520 | dirty = inode->i_state & I_DIRTY; |
| 508 | if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && | 521 | if (inode->i_state & I_DIRTY_TIME) { |
| 509 | (inode->i_state & I_DIRTY_TIME)) || | 522 | if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || |
| 510 | (inode->i_state & I_DIRTY_TIME_EXPIRED)) { | 523 | unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) || |
| 511 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | 524 | unlikely(time_after(jiffies, |
| 512 | trace_writeback_lazytime(inode); | 525 | (inode->dirtied_time_when + |
| 513 | } | 526 | dirtytime_expire_interval * HZ)))) { |
| 527 | dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; | ||
| 528 | trace_writeback_lazytime(inode); | ||
| 529 | } | ||
| 530 | } else | ||
| 531 | inode->i_state &= ~I_DIRTY_TIME_EXPIRED; | ||
| 514 | inode->i_state &= ~dirty; | 532 | inode->i_state &= ~dirty; |
| 515 | 533 | ||
| 516 | /* | 534 | /* |
| @@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) | |||
| 1131 | rcu_read_unlock(); | 1149 | rcu_read_unlock(); |
| 1132 | } | 1150 | } |
| 1133 | 1151 | ||
| 1152 | /* | ||
| 1153 | * Wake up bdi's periodically to make sure dirtytime inodes gets | ||
| 1154 | * written back periodically. We deliberately do *not* check the | ||
| 1155 | * b_dirtytime list in wb_has_dirty_io(), since this would cause the | ||
| 1156 | * kernel to be constantly waking up once there are any dirtytime | ||
| 1157 | * inodes on the system. So instead we define a separate delayed work | ||
| 1158 | * function which gets called much more rarely. (By default, only | ||
| 1159 | * once every 12 hours.) | ||
| 1160 | * | ||
| 1161 | * If there is any other write activity going on in the file system, | ||
| 1162 | * this function won't be necessary. But if the only thing that has | ||
| 1163 | * happened on the file system is a dirtytime inode caused by an atime | ||
| 1164 | * update, we need this infrastructure below to make sure that inode | ||
| 1165 | * eventually gets pushed out to disk. | ||
| 1166 | */ | ||
| 1167 | static void wakeup_dirtytime_writeback(struct work_struct *w); | ||
| 1168 | static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback); | ||
| 1169 | |||
| 1170 | static void wakeup_dirtytime_writeback(struct work_struct *w) | ||
| 1171 | { | ||
| 1172 | struct backing_dev_info *bdi; | ||
| 1173 | |||
| 1174 | rcu_read_lock(); | ||
| 1175 | list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { | ||
| 1176 | if (list_empty(&bdi->wb.b_dirty_time)) | ||
| 1177 | continue; | ||
| 1178 | bdi_wakeup_thread(bdi); | ||
| 1179 | } | ||
| 1180 | rcu_read_unlock(); | ||
| 1181 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | static int __init start_dirtytime_writeback(void) | ||
| 1185 | { | ||
| 1186 | schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ); | ||
| 1187 | return 0; | ||
| 1188 | } | ||
| 1189 | __initcall(start_dirtytime_writeback); | ||
| 1190 | |||
| 1191 | int dirtytime_interval_handler(struct ctl_table *table, int write, | ||
| 1192 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 1193 | { | ||
| 1194 | int ret; | ||
| 1195 | |||
| 1196 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
| 1197 | if (ret == 0 && write) | ||
| 1198 | mod_delayed_work(system_wq, &dirtytime_work, 0); | ||
| 1199 | return ret; | ||
| 1200 | } | ||
| 1201 | |||
| 1134 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) | 1202 | static noinline void block_dump___mark_inode_dirty(struct inode *inode) |
| 1135 | { | 1203 | { |
| 1136 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { | 1204 | if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { |
| @@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
| 1269 | } | 1337 | } |
| 1270 | 1338 | ||
| 1271 | inode->dirtied_when = jiffies; | 1339 | inode->dirtied_when = jiffies; |
| 1272 | list_move(&inode->i_wb_list, dirtytime ? | 1340 | if (dirtytime) |
| 1273 | &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); | 1341 | inode->dirtied_time_when = jiffies; |
| 1342 | if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES)) | ||
| 1343 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | ||
| 1344 | else | ||
| 1345 | list_move(&inode->i_wb_list, | ||
| 1346 | &bdi->wb.b_dirty_time); | ||
| 1274 | spin_unlock(&bdi->wb.list_lock); | 1347 | spin_unlock(&bdi->wb.list_lock); |
| 1275 | trace_writeback_dirty_inode_enqueue(inode); | 1348 | trace_writeback_dirty_inode_enqueue(inode); |
| 1276 | 1349 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index b4d71b5e1ff2..f4131e8ead74 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -604,6 +604,7 @@ struct inode { | |||
| 604 | struct mutex i_mutex; | 604 | struct mutex i_mutex; |
| 605 | 605 | ||
| 606 | unsigned long dirtied_when; /* jiffies of first dirtying */ | 606 | unsigned long dirtied_when; /* jiffies of first dirtying */ |
| 607 | unsigned long dirtied_time_when; | ||
| 607 | 608 | ||
| 608 | struct hlist_node i_hash; | 609 | struct hlist_node i_hash; |
| 609 | struct list_head i_wb_list; /* backing dev IO list */ | 610 | struct list_head i_wb_list; /* backing dev IO list */ |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 00048339c23e..b2dd371ec0ca 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -130,6 +130,7 @@ extern int vm_dirty_ratio; | |||
| 130 | extern unsigned long vm_dirty_bytes; | 130 | extern unsigned long vm_dirty_bytes; |
| 131 | extern unsigned int dirty_writeback_interval; | 131 | extern unsigned int dirty_writeback_interval; |
| 132 | extern unsigned int dirty_expire_interval; | 132 | extern unsigned int dirty_expire_interval; |
| 133 | extern unsigned int dirtytime_expire_interval; | ||
| 133 | extern int vm_highmem_is_dirtyable; | 134 | extern int vm_highmem_is_dirtyable; |
| 134 | extern int block_dump; | 135 | extern int block_dump; |
| 135 | extern int laptop_mode; | 136 | extern int laptop_mode; |
| @@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write, | |||
| 146 | extern int dirty_bytes_handler(struct ctl_table *table, int write, | 147 | extern int dirty_bytes_handler(struct ctl_table *table, int write, |
| 147 | void __user *buffer, size_t *lenp, | 148 | void __user *buffer, size_t *lenp, |
| 148 | loff_t *ppos); | 149 | loff_t *ppos); |
| 150 | int dirtytime_interval_handler(struct ctl_table *table, int write, | ||
| 151 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
| 149 | 152 | ||
| 150 | struct ctl_table; | 153 | struct ctl_table; |
| 151 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, | 154 | int dirty_writeback_centisecs_handler(struct ctl_table *, int, |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 88ea2d6e0031..ce410bb9f2e1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -1228,6 +1228,14 @@ static struct ctl_table vm_table[] = { | |||
| 1228 | .extra1 = &zero, | 1228 | .extra1 = &zero, |
| 1229 | }, | 1229 | }, |
| 1230 | { | 1230 | { |
| 1231 | .procname = "dirtytime_expire_seconds", | ||
| 1232 | .data = &dirtytime_expire_interval, | ||
| 1233 | .maxlen = sizeof(dirty_expire_interval), | ||
| 1234 | .mode = 0644, | ||
| 1235 | .proc_handler = dirtytime_interval_handler, | ||
| 1236 | .extra1 = &zero, | ||
| 1237 | }, | ||
| 1238 | { | ||
| 1231 | .procname = "nr_pdflush_threads", | 1239 | .procname = "nr_pdflush_threads", |
| 1232 | .mode = 0444 /* read-only */, | 1240 | .mode = 0444 /* read-only */, |
| 1233 | .proc_handler = pdflush_proc_obsolete, | 1241 | .proc_handler = pdflush_proc_obsolete, |
