diff options
Diffstat (limited to 'fs/ext4/fsync.c')
-rw-r--r-- | fs/ext4/fsync.c | 142 |
1 files changed, 115 insertions, 27 deletions
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2e546e..ce66d2fe826c 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -34,6 +34,89 @@ | |||
34 | 34 | ||
35 | #include <trace/events/ext4.h> | 35 | #include <trace/events/ext4.h> |
36 | 36 | ||
37 | static void dump_completed_IO(struct inode * inode) | ||
38 | { | ||
39 | #ifdef EXT4FS_DEBUG | ||
40 | struct list_head *cur, *before, *after; | ||
41 | ext4_io_end_t *io, *io0, *io1; | ||
42 | unsigned long flags; | ||
43 | |||
44 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ | ||
45 | ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); | ||
50 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
51 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ | ||
52 | cur = &io->list; | ||
53 | before = cur->prev; | ||
54 | io0 = container_of(before, ext4_io_end_t, list); | ||
55 | after = cur->next; | ||
56 | io1 = container_of(after, ext4_io_end_t, list); | ||
57 | |||
58 | ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", | ||
59 | io, inode->i_ino, io0, io1); | ||
60 | } | ||
61 | spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * This function is called from ext4_sync_file(). | ||
67 | * | ||
68 | * When IO is completed, the work to convert unwritten extents to | ||
69 | * written is queued on workqueue but may not get immediately | ||
70 | * scheduled. When fsync is called, we need to ensure the | ||
71 | * conversion is complete before fsync returns. | ||
72 | * The inode keeps track of a list of pending/completed IO that | ||
73 | * might needs to do the conversion. This function walks through | ||
74 | * the list and convert the related unwritten extents for completed IO | ||
75 | * to written. | ||
76 | * The function return the number of pending IOs on success. | ||
77 | */ | ||
78 | extern int ext4_flush_completed_IO(struct inode *inode) | ||
79 | { | ||
80 | ext4_io_end_t *io; | ||
81 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
82 | unsigned long flags; | ||
83 | int ret = 0; | ||
84 | int ret2 = 0; | ||
85 | |||
86 | if (list_empty(&ei->i_completed_io_list)) | ||
87 | return ret; | ||
88 | |||
89 | dump_completed_IO(inode); | ||
90 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
91 | while (!list_empty(&ei->i_completed_io_list)){ | ||
92 | io = list_entry(ei->i_completed_io_list.next, | ||
93 | ext4_io_end_t, list); | ||
94 | /* | ||
95 | * Calling ext4_end_io_nolock() to convert completed | ||
96 | * IO to written. | ||
97 | * | ||
98 | * When ext4_sync_file() is called, run_queue() may already | ||
99 | * about to flush the work corresponding to this io structure. | ||
100 | * It will be upset if it founds the io structure related | ||
101 | * to the work-to-be schedule is freed. | ||
102 | * | ||
103 | * Thus we need to keep the io structure still valid here after | ||
104 | * conversion finished. The io structure has a flag to | ||
105 | * avoid double converting from both fsync and background work | ||
106 | * queue work. | ||
107 | */ | ||
108 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
109 | ret = ext4_end_io_nolock(io); | ||
110 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | ||
111 | if (ret < 0) | ||
112 | ret2 = ret; | ||
113 | else | ||
114 | list_del_init(&io->list); | ||
115 | } | ||
116 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | ||
117 | return (ret2 < 0) ? ret2 : 0; | ||
118 | } | ||
119 | |||
37 | /* | 120 | /* |
38 | * If we're not journaling and this is a just-created file, we have to | 121 | * If we're not journaling and this is a just-created file, we have to |
39 | * sync our parent directory (if it was freshly created) since | 122 | * sync our parent directory (if it was freshly created) since |
@@ -42,9 +125,11 @@ | |||
42 | * the parent directory's parent as well, and so on recursively, if | 125 | * the parent directory's parent as well, and so on recursively, if |
43 | * they are also freshly created. | 126 | * they are also freshly created. |
44 | */ | 127 | */ |
45 | static void ext4_sync_parent(struct inode *inode) | 128 | static int ext4_sync_parent(struct inode *inode) |
46 | { | 129 | { |
130 | struct writeback_control wbc; | ||
47 | struct dentry *dentry = NULL; | 131 | struct dentry *dentry = NULL; |
132 | int ret = 0; | ||
48 | 133 | ||
49 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | 134 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { |
50 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); | 135 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); |
@@ -53,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode) | |||
53 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) | 138 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) |
54 | break; | 139 | break; |
55 | inode = dentry->d_parent->d_inode; | 140 | inode = dentry->d_parent->d_inode; |
56 | sync_mapping_buffers(inode->i_mapping); | 141 | ret = sync_mapping_buffers(inode->i_mapping); |
142 | if (ret) | ||
143 | break; | ||
144 | memset(&wbc, 0, sizeof(wbc)); | ||
145 | wbc.sync_mode = WB_SYNC_ALL; | ||
146 | wbc.nr_to_write = 0; /* only write out the inode */ | ||
147 | ret = sync_inode(inode, &wbc); | ||
148 | if (ret) | ||
149 | break; | ||
57 | } | 150 | } |
151 | return ret; | ||
58 | } | 152 | } |
59 | 153 | ||
60 | /* | 154 | /* |
@@ -78,23 +172,24 @@ int ext4_sync_file(struct file *file, int datasync) | |||
78 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 172 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
79 | int ret; | 173 | int ret; |
80 | tid_t commit_tid; | 174 | tid_t commit_tid; |
175 | bool needs_barrier = false; | ||
81 | 176 | ||
82 | J_ASSERT(ext4_journal_current_handle() == NULL); | 177 | J_ASSERT(ext4_journal_current_handle() == NULL); |
83 | 178 | ||
84 | trace_ext4_sync_file(file, datasync); | 179 | trace_ext4_sync_file_enter(file, datasync); |
85 | 180 | ||
86 | if (inode->i_sb->s_flags & MS_RDONLY) | 181 | if (inode->i_sb->s_flags & MS_RDONLY) |
87 | return 0; | 182 | return 0; |
88 | 183 | ||
89 | ret = flush_completed_IO(inode); | 184 | ret = ext4_flush_completed_IO(inode); |
90 | if (ret < 0) | 185 | if (ret < 0) |
91 | return ret; | 186 | goto out; |
92 | 187 | ||
93 | if (!journal) { | 188 | if (!journal) { |
94 | ret = generic_file_fsync(file, datasync); | 189 | ret = generic_file_fsync(file, datasync); |
95 | if (!ret && !list_empty(&inode->i_dentry)) | 190 | if (!ret && !list_empty(&inode->i_dentry)) |
96 | ext4_sync_parent(inode); | 191 | ret = ext4_sync_parent(inode); |
97 | return ret; | 192 | goto out; |
98 | } | 193 | } |
99 | 194 | ||
100 | /* | 195 | /* |
@@ -111,27 +206,20 @@ int ext4_sync_file(struct file *file, int datasync) | |||
111 | * (they were dirtied by commit). But that's OK - the blocks are | 206 | * (they were dirtied by commit). But that's OK - the blocks are |
112 | * safe in-journal, which is all fsync() needs to ensure. | 207 | * safe in-journal, which is all fsync() needs to ensure. |
113 | */ | 208 | */ |
114 | if (ext4_should_journal_data(inode)) | 209 | if (ext4_should_journal_data(inode)) { |
115 | return ext4_force_commit(inode->i_sb); | 210 | ret = ext4_force_commit(inode->i_sb); |
211 | goto out; | ||
212 | } | ||
116 | 213 | ||
117 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 214 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
118 | if (jbd2_log_start_commit(journal, commit_tid)) { | 215 | if (journal->j_flags & JBD2_BARRIER && |
119 | /* | 216 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
120 | * When the journal is on a different device than the | 217 | needs_barrier = true; |
121 | * fs data disk, we need to issue the barrier in | 218 | jbd2_log_start_commit(journal, commit_tid); |
122 | * writeback mode. (In ordered mode, the jbd2 layer | 219 | ret = jbd2_log_wait_commit(journal, commit_tid); |
123 | * will take care of issuing the barrier. In | 220 | if (needs_barrier) |
124 | * data=journal, all of the data blocks are written to | 221 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
125 | * the journal device.) | 222 | out: |
126 | */ | 223 | trace_ext4_sync_file_exit(inode, ret); |
127 | if (ext4_should_writeback_data(inode) && | ||
128 | (journal->j_fs_dev != journal->j_dev) && | ||
129 | (journal->j_flags & JBD2_BARRIER)) | ||
130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | ||
131 | NULL, BLKDEV_IFL_WAIT); | ||
132 | ret = jbd2_log_wait_commit(journal, commit_tid); | ||
133 | } else if (journal->j_flags & JBD2_BARRIER) | ||
134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | ||
135 | BLKDEV_IFL_WAIT); | ||
136 | return ret; | 224 | return ret; |
137 | } | 225 | } |