diff options
author | Jan Kara <jack@suse.cz> | 2014-02-21 05:19:04 -0500 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2014-02-21 20:02:28 -0500 |
commit | 0dc83bd30b0bf5410c0933cfbbf8853248eff0a9 (patch) | |
tree | 66b8f3ebdd4a6c0de6f82e449c7e037170e26b81 | |
parent | 1362f4ea20fa63688ba6026e586d9746ff13a846 (diff) |
Revert "writeback: do not sync data dirtied after sync start"
This reverts commit c4a391b53a72d2df4ee97f96f78c1d5971b47489. Dave
Chinner <david@fromorbit.com> has reported the commit may cause some
inodes to be left out from sync(2). This is because we can call
redirty_tail() for some inode (which sets i_dirtied_when to current time)
after sync(2) has started or similarly requeue_inode() can set
i_dirtied_when to current time if writeback had to skip some pages. The
real problem is in the functions clobbering i_dirtied_when but fixing
that isn't trivial so revert is a safer choice for now.
CC: stable@vger.kernel.org # >= 3.13
Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r-- | fs/fs-writeback.c | 33 | ||||
-rw-r--r-- | fs/sync.c | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 2 | ||||
-rw-r--r-- | include/linux/writeback.h | 2 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 6 |
5 files changed, 22 insertions, 36 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e0259a163f98..d754e3cf99a8 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -40,18 +40,13 @@ | |||
40 | struct wb_writeback_work { | 40 | struct wb_writeback_work { |
41 | long nr_pages; | 41 | long nr_pages; |
42 | struct super_block *sb; | 42 | struct super_block *sb; |
43 | /* | 43 | unsigned long *older_than_this; |
44 | * Write only inodes dirtied before this time. Don't forget to set | ||
45 | * older_than_this_is_set when you set this. | ||
46 | */ | ||
47 | unsigned long older_than_this; | ||
48 | enum writeback_sync_modes sync_mode; | 44 | enum writeback_sync_modes sync_mode; |
49 | unsigned int tagged_writepages:1; | 45 | unsigned int tagged_writepages:1; |
50 | unsigned int for_kupdate:1; | 46 | unsigned int for_kupdate:1; |
51 | unsigned int range_cyclic:1; | 47 | unsigned int range_cyclic:1; |
52 | unsigned int for_background:1; | 48 | unsigned int for_background:1; |
53 | unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | 49 | unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ |
54 | unsigned int older_than_this_is_set:1; | ||
55 | enum wb_reason reason; /* why was writeback initiated? */ | 50 | enum wb_reason reason; /* why was writeback initiated? */ |
56 | 51 | ||
57 | struct list_head list; /* pending work list */ | 52 | struct list_head list; /* pending work list */ |
@@ -252,10 +247,10 @@ static int move_expired_inodes(struct list_head *delaying_queue, | |||
252 | int do_sb_sort = 0; | 247 | int do_sb_sort = 0; |
253 | int moved = 0; | 248 | int moved = 0; |
254 | 249 | ||
255 | WARN_ON_ONCE(!work->older_than_this_is_set); | ||
256 | while (!list_empty(delaying_queue)) { | 250 | while (!list_empty(delaying_queue)) { |
257 | inode = wb_inode(delaying_queue->prev); | 251 | inode = wb_inode(delaying_queue->prev); |
258 | if (inode_dirtied_after(inode, work->older_than_this)) | 252 | if (work->older_than_this && |
253 | inode_dirtied_after(inode, *work->older_than_this)) | ||
259 | break; | 254 | break; |
260 | list_move(&inode->i_wb_list, &tmp); | 255 | list_move(&inode->i_wb_list, &tmp); |
261 | moved++; | 256 | moved++; |
@@ -742,8 +737,6 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, | |||
742 | .sync_mode = WB_SYNC_NONE, | 737 | .sync_mode = WB_SYNC_NONE, |
743 | .range_cyclic = 1, | 738 | .range_cyclic = 1, |
744 | .reason = reason, | 739 | .reason = reason, |
745 | .older_than_this = jiffies, | ||
746 | .older_than_this_is_set = 1, | ||
747 | }; | 740 | }; |
748 | 741 | ||
749 | spin_lock(&wb->list_lock); | 742 | spin_lock(&wb->list_lock); |
@@ -802,13 +795,12 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
802 | { | 795 | { |
803 | unsigned long wb_start = jiffies; | 796 | unsigned long wb_start = jiffies; |
804 | long nr_pages = work->nr_pages; | 797 | long nr_pages = work->nr_pages; |
798 | unsigned long oldest_jif; | ||
805 | struct inode *inode; | 799 | struct inode *inode; |
806 | long progress; | 800 | long progress; |
807 | 801 | ||
808 | if (!work->older_than_this_is_set) { | 802 | oldest_jif = jiffies; |
809 | work->older_than_this = jiffies; | 803 | work->older_than_this = &oldest_jif; |
810 | work->older_than_this_is_set = 1; | ||
811 | } | ||
812 | 804 | ||
813 | spin_lock(&wb->list_lock); | 805 | spin_lock(&wb->list_lock); |
814 | for (;;) { | 806 | for (;;) { |
@@ -842,10 +834,10 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
842 | * safe. | 834 | * safe. |
843 | */ | 835 | */ |
844 | if (work->for_kupdate) { | 836 | if (work->for_kupdate) { |
845 | work->older_than_this = jiffies - | 837 | oldest_jif = jiffies - |
846 | msecs_to_jiffies(dirty_expire_interval * 10); | 838 | msecs_to_jiffies(dirty_expire_interval * 10); |
847 | } else if (work->for_background) | 839 | } else if (work->for_background) |
848 | work->older_than_this = jiffies; | 840 | oldest_jif = jiffies; |
849 | 841 | ||
850 | trace_writeback_start(wb->bdi, work); | 842 | trace_writeback_start(wb->bdi, work); |
851 | if (list_empty(&wb->b_io)) | 843 | if (list_empty(&wb->b_io)) |
@@ -1357,21 +1349,18 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb); | |||
1357 | 1349 | ||
1358 | /** | 1350 | /** |
1359 | * sync_inodes_sb - sync sb inode pages | 1351 | * sync_inodes_sb - sync sb inode pages |
1360 | * @sb: the superblock | 1352 | * @sb: the superblock |
1361 | * @older_than_this: timestamp | ||
1362 | * | 1353 | * |
1363 | * This function writes and waits on any dirty inode belonging to this | 1354 | * This function writes and waits on any dirty inode belonging to this |
1364 | * superblock that has been dirtied before given timestamp. | 1355 | * super_block. |
1365 | */ | 1356 | */ |
1366 | void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this) | 1357 | void sync_inodes_sb(struct super_block *sb) |
1367 | { | 1358 | { |
1368 | DECLARE_COMPLETION_ONSTACK(done); | 1359 | DECLARE_COMPLETION_ONSTACK(done); |
1369 | struct wb_writeback_work work = { | 1360 | struct wb_writeback_work work = { |
1370 | .sb = sb, | 1361 | .sb = sb, |
1371 | .sync_mode = WB_SYNC_ALL, | 1362 | .sync_mode = WB_SYNC_ALL, |
1372 | .nr_pages = LONG_MAX, | 1363 | .nr_pages = LONG_MAX, |
1373 | .older_than_this = older_than_this, | ||
1374 | .older_than_this_is_set = 1, | ||
1375 | .range_cyclic = 0, | 1364 | .range_cyclic = 0, |
1376 | .done = &done, | 1365 | .done = &done, |
1377 | .reason = WB_REASON_SYNC, | 1366 | .reason = WB_REASON_SYNC, |
@@ -27,11 +27,10 @@ | |||
27 | * wait == 1 case since in that case write_inode() functions do | 27 | * wait == 1 case since in that case write_inode() functions do |
28 | * sync_dirty_buffer() and thus effectively write one block at a time. | 28 | * sync_dirty_buffer() and thus effectively write one block at a time. |
29 | */ | 29 | */ |
30 | static int __sync_filesystem(struct super_block *sb, int wait, | 30 | static int __sync_filesystem(struct super_block *sb, int wait) |
31 | unsigned long start) | ||
32 | { | 31 | { |
33 | if (wait) | 32 | if (wait) |
34 | sync_inodes_sb(sb, start); | 33 | sync_inodes_sb(sb); |
35 | else | 34 | else |
36 | writeback_inodes_sb(sb, WB_REASON_SYNC); | 35 | writeback_inodes_sb(sb, WB_REASON_SYNC); |
37 | 36 | ||
@@ -48,7 +47,6 @@ static int __sync_filesystem(struct super_block *sb, int wait, | |||
48 | int sync_filesystem(struct super_block *sb) | 47 | int sync_filesystem(struct super_block *sb) |
49 | { | 48 | { |
50 | int ret; | 49 | int ret; |
51 | unsigned long start = jiffies; | ||
52 | 50 | ||
53 | /* | 51 | /* |
54 | * We need to be protected against the filesystem going from | 52 | * We need to be protected against the filesystem going from |
@@ -62,17 +60,17 @@ int sync_filesystem(struct super_block *sb) | |||
62 | if (sb->s_flags & MS_RDONLY) | 60 | if (sb->s_flags & MS_RDONLY) |
63 | return 0; | 61 | return 0; |
64 | 62 | ||
65 | ret = __sync_filesystem(sb, 0, start); | 63 | ret = __sync_filesystem(sb, 0); |
66 | if (ret < 0) | 64 | if (ret < 0) |
67 | return ret; | 65 | return ret; |
68 | return __sync_filesystem(sb, 1, start); | 66 | return __sync_filesystem(sb, 1); |
69 | } | 67 | } |
70 | EXPORT_SYMBOL_GPL(sync_filesystem); | 68 | EXPORT_SYMBOL_GPL(sync_filesystem); |
71 | 69 | ||
72 | static void sync_inodes_one_sb(struct super_block *sb, void *arg) | 70 | static void sync_inodes_one_sb(struct super_block *sb, void *arg) |
73 | { | 71 | { |
74 | if (!(sb->s_flags & MS_RDONLY)) | 72 | if (!(sb->s_flags & MS_RDONLY)) |
75 | sync_inodes_sb(sb, *((unsigned long *)arg)); | 73 | sync_inodes_sb(sb); |
76 | } | 74 | } |
77 | 75 | ||
78 | static void sync_fs_one_sb(struct super_block *sb, void *arg) | 76 | static void sync_fs_one_sb(struct super_block *sb, void *arg) |
@@ -104,10 +102,9 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) | |||
104 | SYSCALL_DEFINE0(sync) | 102 | SYSCALL_DEFINE0(sync) |
105 | { | 103 | { |
106 | int nowait = 0, wait = 1; | 104 | int nowait = 0, wait = 1; |
107 | unsigned long start = jiffies; | ||
108 | 105 | ||
109 | wakeup_flusher_threads(0, WB_REASON_SYNC); | 106 | wakeup_flusher_threads(0, WB_REASON_SYNC); |
110 | iterate_supers(sync_inodes_one_sb, &start); | 107 | iterate_supers(sync_inodes_one_sb, NULL); |
111 | iterate_supers(sync_fs_one_sb, &nowait); | 108 | iterate_supers(sync_fs_one_sb, &nowait); |
112 | iterate_supers(sync_fs_one_sb, &wait); | 109 | iterate_supers(sync_fs_one_sb, &wait); |
113 | iterate_bdevs(fdatawrite_one_bdev, NULL); | 110 | iterate_bdevs(fdatawrite_one_bdev, NULL); |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f317488263dd..d971f4932b5d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -913,7 +913,7 @@ xfs_flush_inodes( | |||
913 | struct super_block *sb = mp->m_super; | 913 | struct super_block *sb = mp->m_super; |
914 | 914 | ||
915 | if (down_read_trylock(&sb->s_umount)) { | 915 | if (down_read_trylock(&sb->s_umount)) { |
916 | sync_inodes_sb(sb, jiffies); | 916 | sync_inodes_sb(sb); |
917 | up_read(&sb->s_umount); | 917 | up_read(&sb->s_umount); |
918 | } | 918 | } |
919 | } | 919 | } |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fc0e4320aa6d..021b8a319b9e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -97,7 +97,7 @@ void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, | |||
97 | int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); | 97 | int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
98 | int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, | 98 | int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
99 | enum wb_reason reason); | 99 | enum wb_reason reason); |
100 | void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this); | 100 | void sync_inodes_sb(struct super_block *); |
101 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); | 101 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); |
102 | void inode_wait_for_writeback(struct inode *inode); | 102 | void inode_wait_for_writeback(struct inode *inode); |
103 | 103 | ||
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index c7bbbe794e65..464ea82e10db 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
@@ -287,11 +287,11 @@ TRACE_EVENT(writeback_queue_io, | |||
287 | __field(int, reason) | 287 | __field(int, reason) |
288 | ), | 288 | ), |
289 | TP_fast_assign( | 289 | TP_fast_assign( |
290 | unsigned long older_than_this = work->older_than_this; | 290 | unsigned long *older_than_this = work->older_than_this; |
291 | strncpy(__entry->name, dev_name(wb->bdi->dev), 32); | 291 | strncpy(__entry->name, dev_name(wb->bdi->dev), 32); |
292 | __entry->older = older_than_this; | 292 | __entry->older = older_than_this ? *older_than_this : 0; |
293 | __entry->age = older_than_this ? | 293 | __entry->age = older_than_this ? |
294 | (jiffies - older_than_this) * 1000 / HZ : -1; | 294 | (jiffies - *older_than_this) * 1000 / HZ : -1; |
295 | __entry->moved = moved; | 295 | __entry->moved = moved; |
296 | __entry->reason = work->reason; | 296 | __entry->reason = work->reason; |
297 | ), | 297 | ), |