diff options
Diffstat (limited to 'fs/jbd2/commit.c')
| -rw-r--r-- | fs/jbd2/commit.c | 336 |
1 files changed, 138 insertions, 198 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index a2ed72f7ceee..0abe02c4242a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -16,12 +16,15 @@ | |||
| 16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/jbd2.h> | 18 | #include <linux/jbd2.h> |
| 19 | #include <linux/marker.h> | ||
| 19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
| 20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
| 22 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
| 23 | #include <linux/jiffies.h> | 24 | #include <linux/jiffies.h> |
| 24 | #include <linux/crc32.h> | 25 | #include <linux/crc32.h> |
| 26 | #include <linux/writeback.h> | ||
| 27 | #include <linux/backing-dev.h> | ||
| 25 | 28 | ||
| 26 | /* | 29 | /* |
| 27 | * Default IO end handler for temporary BJ_IO buffer_heads. | 30 | * Default IO end handler for temporary BJ_IO buffer_heads. |
| @@ -37,8 +40,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | |||
| 37 | } | 40 | } |
| 38 | 41 | ||
| 39 | /* | 42 | /* |
| 40 | * When an ext3-ordered file is truncated, it is possible that many pages are | 43 | * When an ext4 file is truncated, it is possible that some pages are not |
| 41 | * not sucessfully freed, because they are attached to a committing transaction. | 44 | * successfully freed, because they are attached to a committing transaction. |
| 42 | * After the transaction commits, these pages are left on the LRU, with no | 45 | * After the transaction commits, these pages are left on the LRU, with no |
| 43 | * ->mapping, and with attached buffers. These pages are trivially reclaimable | 46 | * ->mapping, and with attached buffers. These pages are trivially reclaimable |
| 44 | * by the VM, but their apparent absence upsets the VM accounting, and it makes | 47 | * by the VM, but their apparent absence upsets the VM accounting, and it makes |
| @@ -65,7 +68,7 @@ static void release_buffer_page(struct buffer_head *bh) | |||
| 65 | goto nope; | 68 | goto nope; |
| 66 | 69 | ||
| 67 | /* OK, it's a truncated page */ | 70 | /* OK, it's a truncated page */ |
| 68 | if (TestSetPageLocked(page)) | 71 | if (!trylock_page(page)) |
| 69 | goto nope; | 72 | goto nope; |
| 70 | 73 | ||
| 71 | page_cache_get(page); | 74 | page_cache_get(page); |
| @@ -80,21 +83,6 @@ nope: | |||
| 80 | } | 83 | } |
| 81 | 84 | ||
| 82 | /* | 85 | /* |
| 83 | * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is | ||
| 84 | * held. For ranking reasons we must trylock. If we lose, schedule away and | ||
| 85 | * return 0. j_list_lock is dropped in this case. | ||
| 86 | */ | ||
| 87 | static int inverted_lock(journal_t *journal, struct buffer_head *bh) | ||
| 88 | { | ||
| 89 | if (!jbd_trylock_bh_state(bh)) { | ||
| 90 | spin_unlock(&journal->j_list_lock); | ||
| 91 | schedule(); | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | return 1; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Done it all: now submit the commit record. We should have | 86 | * Done it all: now submit the commit record. We should have |
| 99 | * cleaned up our previous buffers by now, so if we are in abort | 87 | * cleaned up our previous buffers by now, so if we are in abort |
| 100 | * mode we can now just skip the rest of the journal write | 88 | * mode we can now just skip the rest of the journal write |
| @@ -112,6 +100,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 112 | struct buffer_head *bh; | 100 | struct buffer_head *bh; |
| 113 | int ret; | 101 | int ret; |
| 114 | int barrier_done = 0; | 102 | int barrier_done = 0; |
| 103 | struct timespec now = current_kernel_time(); | ||
| 115 | 104 | ||
| 116 | if (is_journal_aborted(journal)) | 105 | if (is_journal_aborted(journal)) |
| 117 | return 0; | 106 | return 0; |
| @@ -126,6 +115,8 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 126 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 115 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| 127 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 116 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
| 128 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 117 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
| 118 | tmp->h_commit_sec = cpu_to_be64(now.tv_sec); | ||
| 119 | tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec); | ||
| 129 | 120 | ||
| 130 | if (JBD2_HAS_COMPAT_FEATURE(journal, | 121 | if (JBD2_HAS_COMPAT_FEATURE(journal, |
| 131 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | 122 | JBD2_FEATURE_COMPAT_CHECKSUM)) { |
| @@ -136,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 136 | 127 | ||
| 137 | JBUFFER_TRACE(descriptor, "submit commit block"); | 128 | JBUFFER_TRACE(descriptor, "submit commit block"); |
| 138 | lock_buffer(bh); | 129 | lock_buffer(bh); |
| 139 | get_bh(bh); | 130 | clear_buffer_dirty(bh); |
| 140 | set_buffer_dirty(bh); | ||
| 141 | set_buffer_uptodate(bh); | 131 | set_buffer_uptodate(bh); |
| 142 | bh->b_end_io = journal_end_buffer_io_sync; | 132 | bh->b_end_io = journal_end_buffer_io_sync; |
| 143 | 133 | ||
| @@ -157,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 157 | * to remember if we sent a barrier request | 147 | * to remember if we sent a barrier request |
| 158 | */ | 148 | */ |
| 159 | if (ret == -EOPNOTSUPP && barrier_done) { | 149 | if (ret == -EOPNOTSUPP && barrier_done) { |
| 160 | char b[BDEVNAME_SIZE]; | ||
| 161 | |||
| 162 | printk(KERN_WARNING | 150 | printk(KERN_WARNING |
| 163 | "JBD: barrier-based sync failed on %s - " | 151 | "JBD: barrier-based sync failed on %s - " |
| 164 | "disabling barriers\n", | 152 | "disabling barriers\n", journal->j_devname); |
| 165 | bdevname(journal->j_dev, b)); | ||
| 166 | spin_lock(&journal->j_state_lock); | 153 | spin_lock(&journal->j_state_lock); |
| 167 | journal->j_flags &= ~JBD2_BARRIER; | 154 | journal->j_flags &= ~JBD2_BARRIER; |
| 168 | spin_unlock(&journal->j_state_lock); | 155 | spin_unlock(&journal->j_state_lock); |
| @@ -170,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 170 | /* And try again, without the barrier */ | 157 | /* And try again, without the barrier */ |
| 171 | lock_buffer(bh); | 158 | lock_buffer(bh); |
| 172 | set_buffer_uptodate(bh); | 159 | set_buffer_uptodate(bh); |
| 173 | set_buffer_dirty(bh); | 160 | clear_buffer_dirty(bh); |
| 174 | ret = submit_bh(WRITE, bh); | 161 | ret = submit_bh(WRITE, bh); |
| 175 | } | 162 | } |
| 176 | *cbh = bh; | 163 | *cbh = bh; |
| @@ -197,159 +184,114 @@ static int journal_wait_on_commit_record(struct buffer_head *bh) | |||
| 197 | } | 184 | } |
| 198 | 185 | ||
| 199 | /* | 186 | /* |
| 200 | * Wait for all submitted IO to complete. | 187 | * write the filemap data using writepage() address_space_operations. |
| 188 | * We don't do block allocation here even for delalloc. We don't | ||
| 189 | * use writepages() because with dealyed allocation we may be doing | ||
| 190 | * block allocation in writepages(). | ||
| 201 | */ | 191 | */ |
| 202 | static int journal_wait_on_locked_list(journal_t *journal, | 192 | static int journal_submit_inode_data_buffers(struct address_space *mapping) |
| 203 | transaction_t *commit_transaction) | ||
| 204 | { | 193 | { |
| 205 | int ret = 0; | 194 | int ret; |
| 206 | struct journal_head *jh; | 195 | struct writeback_control wbc = { |
| 207 | 196 | .sync_mode = WB_SYNC_ALL, | |
| 208 | while (commit_transaction->t_locked_list) { | 197 | .nr_to_write = mapping->nrpages * 2, |
| 209 | struct buffer_head *bh; | 198 | .range_start = 0, |
| 210 | 199 | .range_end = i_size_read(mapping->host), | |
| 211 | jh = commit_transaction->t_locked_list->b_tprev; | 200 | .for_writepages = 1, |
| 212 | bh = jh2bh(jh); | 201 | }; |
| 213 | get_bh(bh); | 202 | |
| 214 | if (buffer_locked(bh)) { | 203 | ret = generic_writepages(mapping, &wbc); |
| 215 | spin_unlock(&journal->j_list_lock); | ||
| 216 | wait_on_buffer(bh); | ||
| 217 | if (unlikely(!buffer_uptodate(bh))) | ||
| 218 | ret = -EIO; | ||
| 219 | spin_lock(&journal->j_list_lock); | ||
| 220 | } | ||
| 221 | if (!inverted_lock(journal, bh)) { | ||
| 222 | put_bh(bh); | ||
| 223 | spin_lock(&journal->j_list_lock); | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
| 227 | __jbd2_journal_unfile_buffer(jh); | ||
| 228 | jbd_unlock_bh_state(bh); | ||
| 229 | jbd2_journal_remove_journal_head(bh); | ||
| 230 | put_bh(bh); | ||
| 231 | } else { | ||
| 232 | jbd_unlock_bh_state(bh); | ||
| 233 | } | ||
| 234 | put_bh(bh); | ||
| 235 | cond_resched_lock(&journal->j_list_lock); | ||
| 236 | } | ||
| 237 | return ret; | 204 | return ret; |
| 238 | } | 205 | } |
| 239 | 206 | ||
| 240 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | 207 | /* |
| 208 | * Submit all the data buffers of inode associated with the transaction to | ||
| 209 | * disk. | ||
| 210 | * | ||
| 211 | * We are in a committing transaction. Therefore no new inode can be added to | ||
| 212 | * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently | ||
| 213 | * operate on from being released while we write out pages. | ||
| 214 | */ | ||
| 215 | static int journal_submit_data_buffers(journal_t *journal, | ||
| 216 | transaction_t *commit_transaction) | ||
| 241 | { | 217 | { |
| 242 | int i; | 218 | struct jbd2_inode *jinode; |
| 219 | int err, ret = 0; | ||
| 220 | struct address_space *mapping; | ||
| 243 | 221 | ||
| 244 | for (i = 0; i < bufs; i++) { | 222 | spin_lock(&journal->j_list_lock); |
| 245 | wbuf[i]->b_end_io = end_buffer_write_sync; | 223 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { |
| 246 | /* We use-up our safety reference in submit_bh() */ | 224 | mapping = jinode->i_vfs_inode->i_mapping; |
| 247 | submit_bh(WRITE, wbuf[i]); | 225 | jinode->i_flags |= JI_COMMIT_RUNNING; |
| 226 | spin_unlock(&journal->j_list_lock); | ||
| 227 | /* | ||
| 228 | * submit the inode data buffers. We use writepage | ||
| 229 | * instead of writepages. Because writepages can do | ||
| 230 | * block allocation with delalloc. We need to write | ||
| 231 | * only allocated blocks here. | ||
| 232 | */ | ||
| 233 | err = journal_submit_inode_data_buffers(mapping); | ||
| 234 | if (!ret) | ||
| 235 | ret = err; | ||
| 236 | spin_lock(&journal->j_list_lock); | ||
| 237 | J_ASSERT(jinode->i_transaction == commit_transaction); | ||
| 238 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
| 239 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 248 | } | 240 | } |
| 241 | spin_unlock(&journal->j_list_lock); | ||
| 242 | return ret; | ||
| 249 | } | 243 | } |
| 250 | 244 | ||
| 251 | /* | 245 | /* |
| 252 | * Submit all the data buffers to disk | 246 | * Wait for data submitted for writeout, refile inodes to proper |
| 247 | * transaction if needed. | ||
| 248 | * | ||
| 253 | */ | 249 | */ |
| 254 | static void journal_submit_data_buffers(journal_t *journal, | 250 | static int journal_finish_inode_data_buffers(journal_t *journal, |
| 255 | transaction_t *commit_transaction) | 251 | transaction_t *commit_transaction) |
| 256 | { | 252 | { |
| 257 | struct journal_head *jh; | 253 | struct jbd2_inode *jinode, *next_i; |
| 258 | struct buffer_head *bh; | 254 | int err, ret = 0; |
| 259 | int locked; | ||
| 260 | int bufs = 0; | ||
| 261 | struct buffer_head **wbuf = journal->j_wbuf; | ||
| 262 | 255 | ||
| 263 | /* | 256 | /* For locking, see the comment in journal_submit_data_buffers() */ |
| 264 | * Whenever we unlock the journal and sleep, things can get added | ||
| 265 | * onto ->t_sync_datalist, so we have to keep looping back to | ||
| 266 | * write_out_data until we *know* that the list is empty. | ||
| 267 | * | ||
| 268 | * Cleanup any flushed data buffers from the data list. Even in | ||
| 269 | * abort mode, we want to flush this out as soon as possible. | ||
| 270 | */ | ||
| 271 | write_out_data: | ||
| 272 | cond_resched(); | ||
| 273 | spin_lock(&journal->j_list_lock); | 257 | spin_lock(&journal->j_list_lock); |
| 274 | 258 | list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { | |
| 275 | while (commit_transaction->t_sync_datalist) { | 259 | jinode->i_flags |= JI_COMMIT_RUNNING; |
| 276 | jh = commit_transaction->t_sync_datalist; | 260 | spin_unlock(&journal->j_list_lock); |
| 277 | bh = jh2bh(jh); | 261 | err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); |
| 278 | locked = 0; | 262 | if (err) { |
| 279 | 263 | /* | |
| 280 | /* Get reference just to make sure buffer does not disappear | 264 | * Because AS_EIO is cleared by |
| 281 | * when we are forced to drop various locks */ | 265 | * wait_on_page_writeback_range(), set it again so |
| 282 | get_bh(bh); | 266 | * that user process can get -EIO from fsync(). |
| 283 | /* If the buffer is dirty, we need to submit IO and hence | 267 | */ |
| 284 | * we need the buffer lock. We try to lock the buffer without | 268 | set_bit(AS_EIO, |
| 285 | * blocking. If we fail, we need to drop j_list_lock and do | 269 | &jinode->i_vfs_inode->i_mapping->flags); |
| 286 | * blocking lock_buffer(). | 270 | |
| 287 | */ | 271 | if (!ret) |
| 288 | if (buffer_dirty(bh)) { | 272 | ret = err; |
| 289 | if (test_set_buffer_locked(bh)) { | ||
| 290 | BUFFER_TRACE(bh, "needs blocking lock"); | ||
| 291 | spin_unlock(&journal->j_list_lock); | ||
| 292 | /* Write out all data to prevent deadlocks */ | ||
| 293 | journal_do_submit_data(wbuf, bufs); | ||
| 294 | bufs = 0; | ||
| 295 | lock_buffer(bh); | ||
| 296 | spin_lock(&journal->j_list_lock); | ||
| 297 | } | ||
| 298 | locked = 1; | ||
| 299 | } | ||
| 300 | /* We have to get bh_state lock. Again out of order, sigh. */ | ||
| 301 | if (!inverted_lock(journal, bh)) { | ||
| 302 | jbd_lock_bh_state(bh); | ||
| 303 | spin_lock(&journal->j_list_lock); | ||
| 304 | } | ||
| 305 | /* Someone already cleaned up the buffer? */ | ||
| 306 | if (!buffer_jbd(bh) | ||
| 307 | || jh->b_transaction != commit_transaction | ||
| 308 | || jh->b_jlist != BJ_SyncData) { | ||
| 309 | jbd_unlock_bh_state(bh); | ||
| 310 | if (locked) | ||
| 311 | unlock_buffer(bh); | ||
| 312 | BUFFER_TRACE(bh, "already cleaned up"); | ||
| 313 | put_bh(bh); | ||
| 314 | continue; | ||
| 315 | } | ||
| 316 | if (locked && test_clear_buffer_dirty(bh)) { | ||
| 317 | BUFFER_TRACE(bh, "needs writeout, adding to array"); | ||
| 318 | wbuf[bufs++] = bh; | ||
| 319 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 320 | BJ_Locked); | ||
| 321 | jbd_unlock_bh_state(bh); | ||
| 322 | if (bufs == journal->j_wbufsize) { | ||
| 323 | spin_unlock(&journal->j_list_lock); | ||
| 324 | journal_do_submit_data(wbuf, bufs); | ||
| 325 | bufs = 0; | ||
| 326 | goto write_out_data; | ||
| 327 | } | ||
| 328 | } else if (!locked && buffer_locked(bh)) { | ||
| 329 | __jbd2_journal_file_buffer(jh, commit_transaction, | ||
| 330 | BJ_Locked); | ||
| 331 | jbd_unlock_bh_state(bh); | ||
| 332 | put_bh(bh); | ||
| 333 | } else { | ||
| 334 | BUFFER_TRACE(bh, "writeout complete: unfile"); | ||
| 335 | __jbd2_journal_unfile_buffer(jh); | ||
| 336 | jbd_unlock_bh_state(bh); | ||
| 337 | if (locked) | ||
| 338 | unlock_buffer(bh); | ||
| 339 | jbd2_journal_remove_journal_head(bh); | ||
| 340 | /* Once for our safety reference, once for | ||
| 341 | * jbd2_journal_remove_journal_head() */ | ||
| 342 | put_bh(bh); | ||
| 343 | put_bh(bh); | ||
| 344 | } | 273 | } |
| 274 | spin_lock(&journal->j_list_lock); | ||
| 275 | jinode->i_flags &= ~JI_COMMIT_RUNNING; | ||
| 276 | wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); | ||
| 277 | } | ||
| 345 | 278 | ||
| 346 | if (need_resched() || spin_needbreak(&journal->j_list_lock)) { | 279 | /* Now refile inode to proper lists */ |
| 347 | spin_unlock(&journal->j_list_lock); | 280 | list_for_each_entry_safe(jinode, next_i, |
| 348 | goto write_out_data; | 281 | &commit_transaction->t_inode_list, i_list) { |
| 282 | list_del(&jinode->i_list); | ||
| 283 | if (jinode->i_next_transaction) { | ||
| 284 | jinode->i_transaction = jinode->i_next_transaction; | ||
| 285 | jinode->i_next_transaction = NULL; | ||
| 286 | list_add(&jinode->i_list, | ||
| 287 | &jinode->i_transaction->t_inode_list); | ||
| 288 | } else { | ||
| 289 | jinode->i_transaction = NULL; | ||
| 349 | } | 290 | } |
| 350 | } | 291 | } |
| 351 | spin_unlock(&journal->j_list_lock); | 292 | spin_unlock(&journal->j_list_lock); |
| 352 | journal_do_submit_data(wbuf, bufs); | 293 | |
| 294 | return ret; | ||
| 353 | } | 295 | } |
| 354 | 296 | ||
| 355 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) | 297 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) |
| @@ -426,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 426 | commit_transaction = journal->j_running_transaction; | 368 | commit_transaction = journal->j_running_transaction; |
| 427 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 369 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
| 428 | 370 | ||
| 371 | trace_mark(jbd2_start_commit, "dev %s transaction %d", | ||
| 372 | journal->j_devname, commit_transaction->t_tid); | ||
| 429 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 373 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
| 430 | commit_transaction->t_tid); | 374 | commit_transaction->t_tid); |
| 431 | 375 | ||
| @@ -524,21 +468,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 524 | * Now start flushing things to disk, in the order they appear | 468 | * Now start flushing things to disk, in the order they appear |
| 525 | * on the transaction lists. Data blocks go first. | 469 | * on the transaction lists. Data blocks go first. |
| 526 | */ | 470 | */ |
| 527 | err = 0; | 471 | err = journal_submit_data_buffers(journal, commit_transaction); |
| 528 | journal_submit_data_buffers(journal, commit_transaction); | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Wait for all previously submitted IO to complete if commit | ||
| 532 | * record is to be written synchronously. | ||
| 533 | */ | ||
| 534 | spin_lock(&journal->j_list_lock); | ||
| 535 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
| 536 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) | ||
| 537 | err = journal_wait_on_locked_list(journal, | ||
| 538 | commit_transaction); | ||
| 539 | |||
| 540 | spin_unlock(&journal->j_list_lock); | ||
| 541 | |||
| 542 | if (err) | 472 | if (err) |
| 543 | jbd2_journal_abort(journal, err); | 473 | jbd2_journal_abort(journal, err); |
| 544 | 474 | ||
| @@ -547,16 +477,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 547 | jbd_debug(3, "JBD: commit phase 2\n"); | 477 | jbd_debug(3, "JBD: commit phase 2\n"); |
| 548 | 478 | ||
| 549 | /* | 479 | /* |
| 550 | * If we found any dirty or locked buffers, then we should have | ||
| 551 | * looped back up to the write_out_data label. If there weren't | ||
| 552 | * any then journal_clean_data_list should have wiped the list | ||
| 553 | * clean by now, so check that it is in fact empty. | ||
| 554 | */ | ||
| 555 | J_ASSERT (commit_transaction->t_sync_datalist == NULL); | ||
| 556 | |||
| 557 | jbd_debug (3, "JBD: commit phase 3\n"); | ||
| 558 | |||
| 559 | /* | ||
| 560 | * Way to go: we have now written out all of the data for a | 480 | * Way to go: we have now written out all of the data for a |
| 561 | * transaction! Now comes the tricky part: we need to write out | 481 | * transaction! Now comes the tricky part: we need to write out |
| 562 | * metadata. Loop over the transaction's entire buffer list: | 482 | * metadata. Loop over the transaction's entire buffer list: |
| @@ -574,6 +494,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 574 | J_ASSERT(commit_transaction->t_nr_buffers <= | 494 | J_ASSERT(commit_transaction->t_nr_buffers <= |
| 575 | commit_transaction->t_outstanding_credits); | 495 | commit_transaction->t_outstanding_credits); |
| 576 | 496 | ||
| 497 | err = 0; | ||
| 577 | descriptor = NULL; | 498 | descriptor = NULL; |
| 578 | bufs = 0; | 499 | bufs = 0; |
| 579 | while (commit_transaction->t_buffers) { | 500 | while (commit_transaction->t_buffers) { |
| @@ -583,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 583 | jh = commit_transaction->t_buffers; | 504 | jh = commit_transaction->t_buffers; |
| 584 | 505 | ||
| 585 | /* If we're in abort mode, we just un-journal the buffer and | 506 | /* If we're in abort mode, we just un-journal the buffer and |
| 586 | release it for background writing. */ | 507 | release it. */ |
| 587 | 508 | ||
| 588 | if (is_journal_aborted(journal)) { | 509 | if (is_journal_aborted(journal)) { |
| 510 | clear_buffer_jbddirty(jh2bh(jh)); | ||
| 589 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
| 590 | jbd2_journal_refile_buffer(journal, jh); | 512 | jbd2_journal_refile_buffer(journal, jh); |
| 591 | /* If that was the last one, we need to clean up | 513 | /* If that was the last one, we need to clean up |
| @@ -748,13 +670,23 @@ start_journal_io: | |||
| 748 | &cbh, crc32_sum); | 670 | &cbh, crc32_sum); |
| 749 | if (err) | 671 | if (err) |
| 750 | __jbd2_journal_abort_hard(journal); | 672 | __jbd2_journal_abort_hard(journal); |
| 673 | } | ||
| 751 | 674 | ||
| 752 | spin_lock(&journal->j_list_lock); | 675 | /* |
| 753 | err = journal_wait_on_locked_list(journal, | 676 | * This is the right place to wait for data buffers both for ASYNC |
| 754 | commit_transaction); | 677 | * and !ASYNC commit. If commit is ASYNC, we need to wait only after |
| 755 | spin_unlock(&journal->j_list_lock); | 678 | * the commit block went to disk (which happens above). If commit is |
| 756 | if (err) | 679 | * SYNC, we need to wait for data buffers before we start writing |
| 757 | __jbd2_journal_abort_hard(journal); | 680 | * commit block, which happens below in such setting. |
| 681 | */ | ||
| 682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
| 683 | if (err) { | ||
| 684 | printk(KERN_WARNING | ||
| 685 | "JBD2: Detected IO errors while flushing file data " | ||
| 686 | "on %s\n", journal->j_devname); | ||
| 687 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
| 688 | jbd2_journal_abort(journal, err); | ||
| 689 | err = 0; | ||
| 758 | } | 690 | } |
| 759 | 691 | ||
| 760 | /* Lo and behold: we have just managed to send a transaction to | 692 | /* Lo and behold: we have just managed to send a transaction to |
| @@ -768,7 +700,7 @@ start_journal_io: | |||
| 768 | so we incur less scheduling load. | 700 | so we incur less scheduling load. |
| 769 | */ | 701 | */ |
| 770 | 702 | ||
| 771 | jbd_debug(3, "JBD: commit phase 4\n"); | 703 | jbd_debug(3, "JBD: commit phase 3\n"); |
| 772 | 704 | ||
| 773 | /* | 705 | /* |
| 774 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 706 | * akpm: these are BJ_IO, and j_list_lock is not needed. |
| @@ -827,7 +759,7 @@ wait_for_iobuf: | |||
| 827 | 759 | ||
| 828 | J_ASSERT (commit_transaction->t_shadow_list == NULL); | 760 | J_ASSERT (commit_transaction->t_shadow_list == NULL); |
| 829 | 761 | ||
| 830 | jbd_debug(3, "JBD: commit phase 5\n"); | 762 | jbd_debug(3, "JBD: commit phase 4\n"); |
| 831 | 763 | ||
| 832 | /* Here we wait for the revoke record and descriptor record buffers */ | 764 | /* Here we wait for the revoke record and descriptor record buffers */ |
| 833 | wait_for_ctlbuf: | 765 | wait_for_ctlbuf: |
| @@ -854,7 +786,10 @@ wait_for_iobuf: | |||
| 854 | /* AKPM: bforget here */ | 786 | /* AKPM: bforget here */ |
| 855 | } | 787 | } |
| 856 | 788 | ||
| 857 | jbd_debug(3, "JBD: commit phase 6\n"); | 789 | if (err) |
| 790 | jbd2_journal_abort(journal, err); | ||
| 791 | |||
| 792 | jbd_debug(3, "JBD: commit phase 5\n"); | ||
| 858 | 793 | ||
| 859 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 794 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
| 860 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 795 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
| @@ -874,9 +809,9 @@ wait_for_iobuf: | |||
| 874 | transaction can be removed from any checkpoint list it was on | 809 | transaction can be removed from any checkpoint list it was on |
| 875 | before. */ | 810 | before. */ |
| 876 | 811 | ||
| 877 | jbd_debug(3, "JBD: commit phase 7\n"); | 812 | jbd_debug(3, "JBD: commit phase 6\n"); |
| 878 | 813 | ||
| 879 | J_ASSERT(commit_transaction->t_sync_datalist == NULL); | 814 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
| 880 | J_ASSERT(commit_transaction->t_buffers == NULL); | 815 | J_ASSERT(commit_transaction->t_buffers == NULL); |
| 881 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 816 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
| 882 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | 817 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); |
| @@ -952,6 +887,8 @@ restart_loop: | |||
| 952 | if (buffer_jbddirty(bh)) { | 887 | if (buffer_jbddirty(bh)) { |
| 953 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 888 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
| 954 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 889 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
| 890 | if (is_journal_aborted(journal)) | ||
| 891 | clear_buffer_jbddirty(bh); | ||
| 955 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 892 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
| 956 | __jbd2_journal_refile_buffer(jh); | 893 | __jbd2_journal_refile_buffer(jh); |
| 957 | jbd_unlock_bh_state(bh); | 894 | jbd_unlock_bh_state(bh); |
| @@ -997,7 +934,7 @@ restart_loop: | |||
| 997 | 934 | ||
| 998 | /* Done with this transaction! */ | 935 | /* Done with this transaction! */ |
| 999 | 936 | ||
| 1000 | jbd_debug(3, "JBD: commit phase 8\n"); | 937 | jbd_debug(3, "JBD: commit phase 7\n"); |
| 1001 | 938 | ||
| 1002 | J_ASSERT(commit_transaction->t_state == T_COMMIT); | 939 | J_ASSERT(commit_transaction->t_state == T_COMMIT); |
| 1003 | 940 | ||
| @@ -1058,6 +995,9 @@ restart_loop: | |||
| 1058 | } | 995 | } |
| 1059 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
| 1060 | 997 | ||
| 998 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | ||
| 999 | journal->j_devname, commit_transaction->t_tid, | ||
| 1000 | journal->j_tail_sequence); | ||
| 1061 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1001 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
| 1062 | journal->j_commit_sequence, journal->j_tail_sequence); | 1002 | journal->j_commit_sequence, journal->j_tail_sequence); |
| 1063 | 1003 | ||
