aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2015-02-02 00:37:00 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2015-02-05 02:45:00 -0500
commit0ae45f63d4ef8d8eeec49c7d8b44a1775fff13e8 (patch)
tree660dbb014482092361eab263847fb906b5a9ec22 /mm
parente36f014edff70fc02b3d3d79cead1d58f289332e (diff)
vfs: add support for a lazytime mount option
Add a new mount option which enables a new "lazytime" mode. This mode causes atime, mtime, and ctime updates to only be made to the in-memory version of the inode. The on-disk times will only get updated when (a) if the inode needs to be updated for some non-time related change, (b) if userspace calls fsync(), syncfs() or sync(), or (c) just before an undeleted inode is evicted from memory. This is OK according to POSIX because there are no guarantees after a crash unless userspace explicitly requests via a fsync(2) call. For workloads which feature a large number of random write to a preallocated file, the lazytime mount option significantly reduces writes to the inode table. The repeated 4k writes to a single block will result in undesirable stress on flash devices and SMR disk drives. Even on conventional HDD's, the repeated writes to the inode table block will trigger Adjacent Track Interference (ATI) remediation latencies, which very negatively impact long tail latencies --- which is a very big deal for web serving tiers (for example). Google-Bug-Id: 18297052 Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0ae0df55000b..915feea94c66 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -69,10 +69,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
69 unsigned long background_thresh; 69 unsigned long background_thresh;
70 unsigned long dirty_thresh; 70 unsigned long dirty_thresh;
71 unsigned long bdi_thresh; 71 unsigned long bdi_thresh;
72 unsigned long nr_dirty, nr_io, nr_more_io; 72 unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
73 struct inode *inode; 73 struct inode *inode;
74 74
75 nr_dirty = nr_io = nr_more_io = 0; 75 nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
76 spin_lock(&wb->list_lock); 76 spin_lock(&wb->list_lock);
77 list_for_each_entry(inode, &wb->b_dirty, i_wb_list) 77 list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
78 nr_dirty++; 78 nr_dirty++;
@@ -80,6 +80,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
80 nr_io++; 80 nr_io++;
81 list_for_each_entry(inode, &wb->b_more_io, i_wb_list) 81 list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
82 nr_more_io++; 82 nr_more_io++;
83 list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
84 if (inode->i_state & I_DIRTY_TIME)
85 nr_dirty_time++;
83 spin_unlock(&wb->list_lock); 86 spin_unlock(&wb->list_lock);
84 87
85 global_dirty_limits(&background_thresh, &dirty_thresh); 88 global_dirty_limits(&background_thresh, &dirty_thresh);
@@ -98,6 +101,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
98 "b_dirty: %10lu\n" 101 "b_dirty: %10lu\n"
99 "b_io: %10lu\n" 102 "b_io: %10lu\n"
100 "b_more_io: %10lu\n" 103 "b_more_io: %10lu\n"
104 "b_dirty_time: %10lu\n"
101 "bdi_list: %10u\n" 105 "bdi_list: %10u\n"
102 "state: %10lx\n", 106 "state: %10lx\n",
103 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), 107 (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
@@ -111,6 +115,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
111 nr_dirty, 115 nr_dirty,
112 nr_io, 116 nr_io,
113 nr_more_io, 117 nr_more_io,
118 nr_dirty_time,
114 !list_empty(&bdi->bdi_list), bdi->state); 119 !list_empty(&bdi->bdi_list), bdi->state);
115#undef K 120#undef K
116 121
@@ -418,6 +423,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
418 INIT_LIST_HEAD(&wb->b_dirty); 423 INIT_LIST_HEAD(&wb->b_dirty);
419 INIT_LIST_HEAD(&wb->b_io); 424 INIT_LIST_HEAD(&wb->b_io);
420 INIT_LIST_HEAD(&wb->b_more_io); 425 INIT_LIST_HEAD(&wb->b_more_io);
426 INIT_LIST_HEAD(&wb->b_dirty_time);
421 spin_lock_init(&wb->list_lock); 427 spin_lock_init(&wb->list_lock);
422 INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); 428 INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
423} 429}