diff options
Diffstat (limited to 'fs/ocfs2/journal.c')
-rw-r--r-- | fs/ocfs2/journal.c | 364 |
1 files changed, 300 insertions, 64 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 99fe9d584f3c..57d7d25a2b9a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include "ocfs2.h" | 35 | #include "ocfs2.h" |
36 | 36 | ||
37 | #include "alloc.h" | 37 | #include "alloc.h" |
38 | #include "blockcheck.h" | ||
38 | #include "dir.h" | 39 | #include "dir.h" |
39 | #include "dlmglue.h" | 40 | #include "dlmglue.h" |
40 | #include "extent_map.h" | 41 | #include "extent_map.h" |
@@ -45,6 +46,7 @@ | |||
45 | #include "slot_map.h" | 46 | #include "slot_map.h" |
46 | #include "super.h" | 47 | #include "super.h" |
47 | #include "sysfile.h" | 48 | #include "sysfile.h" |
49 | #include "quota.h" | ||
48 | 50 | ||
49 | #include "buffer_head_io.h" | 51 | #include "buffer_head_io.h" |
50 | 52 | ||
@@ -52,10 +54,10 @@ DEFINE_SPINLOCK(trans_inc_lock); | |||
52 | 54 | ||
53 | static int ocfs2_force_read_journal(struct inode *inode); | 55 | static int ocfs2_force_read_journal(struct inode *inode); |
54 | static int ocfs2_recover_node(struct ocfs2_super *osb, | 56 | static int ocfs2_recover_node(struct ocfs2_super *osb, |
55 | int node_num); | 57 | int node_num, int slot_num); |
56 | static int __ocfs2_recovery_thread(void *arg); | 58 | static int __ocfs2_recovery_thread(void *arg); |
57 | static int ocfs2_commit_cache(struct ocfs2_super *osb); | 59 | static int ocfs2_commit_cache(struct ocfs2_super *osb); |
58 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb); | 60 | static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota); |
59 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | 61 | static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, |
60 | int dirty, int replayed); | 62 | int dirty, int replayed); |
61 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, | 63 | static int ocfs2_trylock_journal(struct ocfs2_super *osb, |
@@ -64,6 +66,17 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
64 | int slot); | 66 | int slot); |
65 | static int ocfs2_commit_thread(void *arg); | 67 | static int ocfs2_commit_thread(void *arg); |
66 | 68 | ||
69 | static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb) | ||
70 | { | ||
71 | return __ocfs2_wait_on_mount(osb, 0); | ||
72 | } | ||
73 | |||
74 | static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb) | ||
75 | { | ||
76 | return __ocfs2_wait_on_mount(osb, 1); | ||
77 | } | ||
78 | |||
79 | |||
67 | 80 | ||
68 | /* | 81 | /* |
69 | * The recovery_list is a simple linked list of node numbers to recover. | 82 | * The recovery_list is a simple linked list of node numbers to recover. |
@@ -256,11 +269,9 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | |||
256 | BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); | 269 | BUG_ON(osb->journal->j_state == OCFS2_JOURNAL_FREE); |
257 | BUG_ON(max_buffs <= 0); | 270 | BUG_ON(max_buffs <= 0); |
258 | 271 | ||
259 | /* JBD might support this, but our journalling code doesn't yet. */ | 272 | /* Nested transaction? Just return the handle... */ |
260 | if (journal_current_handle()) { | 273 | if (journal_current_handle()) |
261 | mlog(ML_ERROR, "Recursive transaction attempted!\n"); | 274 | return jbd2_journal_start(journal, max_buffs); |
262 | BUG(); | ||
263 | } | ||
264 | 275 | ||
265 | down_read(&osb->journal->j_trans_barrier); | 276 | down_read(&osb->journal->j_trans_barrier); |
266 | 277 | ||
@@ -285,16 +296,18 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | |||
285 | int ocfs2_commit_trans(struct ocfs2_super *osb, | 296 | int ocfs2_commit_trans(struct ocfs2_super *osb, |
286 | handle_t *handle) | 297 | handle_t *handle) |
287 | { | 298 | { |
288 | int ret; | 299 | int ret, nested; |
289 | struct ocfs2_journal *journal = osb->journal; | 300 | struct ocfs2_journal *journal = osb->journal; |
290 | 301 | ||
291 | BUG_ON(!handle); | 302 | BUG_ON(!handle); |
292 | 303 | ||
304 | nested = handle->h_ref > 1; | ||
293 | ret = jbd2_journal_stop(handle); | 305 | ret = jbd2_journal_stop(handle); |
294 | if (ret < 0) | 306 | if (ret < 0) |
295 | mlog_errno(ret); | 307 | mlog_errno(ret); |
296 | 308 | ||
297 | up_read(&journal->j_trans_barrier); | 309 | if (!nested) |
310 | up_read(&journal->j_trans_barrier); | ||
298 | 311 | ||
299 | return ret; | 312 | return ret; |
300 | } | 313 | } |
@@ -357,10 +370,137 @@ bail: | |||
357 | return status; | 370 | return status; |
358 | } | 371 | } |
359 | 372 | ||
360 | int ocfs2_journal_access(handle_t *handle, | 373 | struct ocfs2_triggers { |
361 | struct inode *inode, | 374 | struct jbd2_buffer_trigger_type ot_triggers; |
362 | struct buffer_head *bh, | 375 | int ot_offset; |
363 | int type) | 376 | }; |
377 | |||
378 | static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) | ||
379 | { | ||
380 | return container_of(triggers, struct ocfs2_triggers, ot_triggers); | ||
381 | } | ||
382 | |||
383 | static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | ||
384 | struct buffer_head *bh, | ||
385 | void *data, size_t size) | ||
386 | { | ||
387 | struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); | ||
388 | |||
389 | /* | ||
390 | * We aren't guaranteed to have the superblock here, so we | ||
391 | * must unconditionally compute the ecc data. | ||
392 | * __ocfs2_journal_access() will only set the triggers if | ||
393 | * metaecc is enabled. | ||
394 | */ | ||
395 | ocfs2_block_check_compute(data, size, data + ot->ot_offset); | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * Quota blocks have their own trigger because the struct ocfs2_block_check | ||
400 | * offset depends on the blocksize. | ||
401 | */ | ||
402 | static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | ||
403 | struct buffer_head *bh, | ||
404 | void *data, size_t size) | ||
405 | { | ||
406 | struct ocfs2_disk_dqtrailer *dqt = | ||
407 | ocfs2_block_dqtrailer(size, data); | ||
408 | |||
409 | /* | ||
410 | * We aren't guaranteed to have the superblock here, so we | ||
411 | * must unconditionally compute the ecc data. | ||
412 | * __ocfs2_journal_access() will only set the triggers if | ||
413 | * metaecc is enabled. | ||
414 | */ | ||
415 | ocfs2_block_check_compute(data, size, &dqt->dq_check); | ||
416 | } | ||
417 | |||
418 | /* | ||
419 | * Directory blocks also have their own trigger because the | ||
420 | * struct ocfs2_block_check offset depends on the blocksize. | ||
421 | */ | ||
422 | static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, | ||
423 | struct buffer_head *bh, | ||
424 | void *data, size_t size) | ||
425 | { | ||
426 | struct ocfs2_dir_block_trailer *trailer = | ||
427 | ocfs2_dir_trailer_from_size(size, data); | ||
428 | |||
429 | /* | ||
430 | * We aren't guaranteed to have the superblock here, so we | ||
431 | * must unconditionally compute the ecc data. | ||
432 | * __ocfs2_journal_access() will only set the triggers if | ||
433 | * metaecc is enabled. | ||
434 | */ | ||
435 | ocfs2_block_check_compute(data, size, &trailer->db_check); | ||
436 | } | ||
437 | |||
438 | static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, | ||
439 | struct buffer_head *bh) | ||
440 | { | ||
441 | mlog(ML_ERROR, | ||
442 | "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " | ||
443 | "bh->b_blocknr = %llu\n", | ||
444 | (unsigned long)bh, | ||
445 | (unsigned long long)bh->b_blocknr); | ||
446 | |||
447 | /* We aren't guaranteed to have the superblock here - but if we | ||
448 | * don't, it'll just crash. */ | ||
449 | ocfs2_error(bh->b_assoc_map->host->i_sb, | ||
450 | "JBD2 has aborted our journal, ocfs2 cannot continue\n"); | ||
451 | } | ||
452 | |||
453 | static struct ocfs2_triggers di_triggers = { | ||
454 | .ot_triggers = { | ||
455 | .t_commit = ocfs2_commit_trigger, | ||
456 | .t_abort = ocfs2_abort_trigger, | ||
457 | }, | ||
458 | .ot_offset = offsetof(struct ocfs2_dinode, i_check), | ||
459 | }; | ||
460 | |||
461 | static struct ocfs2_triggers eb_triggers = { | ||
462 | .ot_triggers = { | ||
463 | .t_commit = ocfs2_commit_trigger, | ||
464 | .t_abort = ocfs2_abort_trigger, | ||
465 | }, | ||
466 | .ot_offset = offsetof(struct ocfs2_extent_block, h_check), | ||
467 | }; | ||
468 | |||
469 | static struct ocfs2_triggers gd_triggers = { | ||
470 | .ot_triggers = { | ||
471 | .t_commit = ocfs2_commit_trigger, | ||
472 | .t_abort = ocfs2_abort_trigger, | ||
473 | }, | ||
474 | .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), | ||
475 | }; | ||
476 | |||
477 | static struct ocfs2_triggers db_triggers = { | ||
478 | .ot_triggers = { | ||
479 | .t_commit = ocfs2_db_commit_trigger, | ||
480 | .t_abort = ocfs2_abort_trigger, | ||
481 | }, | ||
482 | }; | ||
483 | |||
484 | static struct ocfs2_triggers xb_triggers = { | ||
485 | .ot_triggers = { | ||
486 | .t_commit = ocfs2_commit_trigger, | ||
487 | .t_abort = ocfs2_abort_trigger, | ||
488 | }, | ||
489 | .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), | ||
490 | }; | ||
491 | |||
492 | static struct ocfs2_triggers dq_triggers = { | ||
493 | .ot_triggers = { | ||
494 | .t_commit = ocfs2_dq_commit_trigger, | ||
495 | .t_abort = ocfs2_abort_trigger, | ||
496 | }, | ||
497 | }; | ||
498 | |||
499 | static int __ocfs2_journal_access(handle_t *handle, | ||
500 | struct inode *inode, | ||
501 | struct buffer_head *bh, | ||
502 | struct ocfs2_triggers *triggers, | ||
503 | int type) | ||
364 | { | 504 | { |
365 | int status; | 505 | int status; |
366 | 506 | ||
@@ -406,6 +546,8 @@ int ocfs2_journal_access(handle_t *handle, | |||
406 | status = -EINVAL; | 546 | status = -EINVAL; |
407 | mlog(ML_ERROR, "Uknown access type!\n"); | 547 | mlog(ML_ERROR, "Uknown access type!\n"); |
408 | } | 548 | } |
549 | if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers) | ||
550 | jbd2_journal_set_triggers(bh, &triggers->ot_triggers); | ||
409 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | 551 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
410 | 552 | ||
411 | if (status < 0) | 553 | if (status < 0) |
@@ -416,6 +558,54 @@ int ocfs2_journal_access(handle_t *handle, | |||
416 | return status; | 558 | return status; |
417 | } | 559 | } |
418 | 560 | ||
561 | int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, | ||
562 | struct buffer_head *bh, int type) | ||
563 | { | ||
564 | return __ocfs2_journal_access(handle, inode, bh, &di_triggers, | ||
565 | type); | ||
566 | } | ||
567 | |||
568 | int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, | ||
569 | struct buffer_head *bh, int type) | ||
570 | { | ||
571 | return __ocfs2_journal_access(handle, inode, bh, &eb_triggers, | ||
572 | type); | ||
573 | } | ||
574 | |||
575 | int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, | ||
576 | struct buffer_head *bh, int type) | ||
577 | { | ||
578 | return __ocfs2_journal_access(handle, inode, bh, &gd_triggers, | ||
579 | type); | ||
580 | } | ||
581 | |||
582 | int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, | ||
583 | struct buffer_head *bh, int type) | ||
584 | { | ||
585 | return __ocfs2_journal_access(handle, inode, bh, &db_triggers, | ||
586 | type); | ||
587 | } | ||
588 | |||
589 | int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, | ||
590 | struct buffer_head *bh, int type) | ||
591 | { | ||
592 | return __ocfs2_journal_access(handle, inode, bh, &xb_triggers, | ||
593 | type); | ||
594 | } | ||
595 | |||
596 | int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, | ||
597 | struct buffer_head *bh, int type) | ||
598 | { | ||
599 | return __ocfs2_journal_access(handle, inode, bh, &dq_triggers, | ||
600 | type); | ||
601 | } | ||
602 | |||
603 | int ocfs2_journal_access(handle_t *handle, struct inode *inode, | ||
604 | struct buffer_head *bh, int type) | ||
605 | { | ||
606 | return __ocfs2_journal_access(handle, inode, bh, NULL, type); | ||
607 | } | ||
608 | |||
419 | int ocfs2_journal_dirty(handle_t *handle, | 609 | int ocfs2_journal_dirty(handle_t *handle, |
420 | struct buffer_head *bh) | 610 | struct buffer_head *bh) |
421 | { | 611 | { |
@@ -434,20 +624,6 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
434 | return status; | 624 | return status; |
435 | } | 625 | } |
436 | 626 | ||
437 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
438 | int ocfs2_journal_dirty_data(handle_t *handle, | ||
439 | struct buffer_head *bh) | ||
440 | { | ||
441 | int err = journal_dirty_data(handle, bh); | ||
442 | if (err) | ||
443 | mlog_errno(err); | ||
444 | /* TODO: When we can handle it, abort the handle and go RO on | ||
445 | * error here. */ | ||
446 | |||
447 | return err; | ||
448 | } | ||
449 | #endif | ||
450 | |||
451 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) | 627 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) |
452 | 628 | ||
453 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 629 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
@@ -587,17 +763,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | |||
587 | mlog_entry_void(); | 763 | mlog_entry_void(); |
588 | 764 | ||
589 | fe = (struct ocfs2_dinode *)bh->b_data; | 765 | fe = (struct ocfs2_dinode *)bh->b_data; |
590 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 766 | |
591 | /* This is called from startup/shutdown which will | 767 | /* The journal bh on the osb always comes from ocfs2_journal_init() |
592 | * handle the errors in a specific manner, so no need | 768 | * and was validated there inside ocfs2_inode_lock_full(). It's a |
593 | * to call ocfs2_error() here. */ | 769 | * code bug if we mess it up. */ |
594 | mlog(ML_ERROR, "Journal dinode %llu has invalid " | 770 | BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); |
595 | "signature: %.*s", | ||
596 | (unsigned long long)le64_to_cpu(fe->i_blkno), 7, | ||
597 | fe->i_signature); | ||
598 | status = -EIO; | ||
599 | goto out; | ||
600 | } | ||
601 | 771 | ||
602 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); | 772 | flags = le32_to_cpu(fe->id1.journal1.ij_flags); |
603 | if (dirty) | 773 | if (dirty) |
@@ -609,11 +779,11 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, | |||
609 | if (replayed) | 779 | if (replayed) |
610 | ocfs2_bump_recovery_generation(fe); | 780 | ocfs2_bump_recovery_generation(fe); |
611 | 781 | ||
782 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); | ||
612 | status = ocfs2_write_block(osb, bh, journal->j_inode); | 783 | status = ocfs2_write_block(osb, bh, journal->j_inode); |
613 | if (status < 0) | 784 | if (status < 0) |
614 | mlog_errno(status); | 785 | mlog_errno(status); |
615 | 786 | ||
616 | out: | ||
617 | mlog_exit(status); | 787 | mlog_exit(status); |
618 | return status; | 788 | return status; |
619 | } | 789 | } |
@@ -878,6 +1048,7 @@ struct ocfs2_la_recovery_item { | |||
878 | int lri_slot; | 1048 | int lri_slot; |
879 | struct ocfs2_dinode *lri_la_dinode; | 1049 | struct ocfs2_dinode *lri_la_dinode; |
880 | struct ocfs2_dinode *lri_tl_dinode; | 1050 | struct ocfs2_dinode *lri_tl_dinode; |
1051 | struct ocfs2_quota_recovery *lri_qrec; | ||
881 | }; | 1052 | }; |
882 | 1053 | ||
883 | /* Does the second half of the recovery process. By this point, the | 1054 | /* Does the second half of the recovery process. By this point, the |
@@ -898,6 +1069,7 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
898 | struct ocfs2_super *osb = journal->j_osb; | 1069 | struct ocfs2_super *osb = journal->j_osb; |
899 | struct ocfs2_dinode *la_dinode, *tl_dinode; | 1070 | struct ocfs2_dinode *la_dinode, *tl_dinode; |
900 | struct ocfs2_la_recovery_item *item, *n; | 1071 | struct ocfs2_la_recovery_item *item, *n; |
1072 | struct ocfs2_quota_recovery *qrec; | ||
901 | LIST_HEAD(tmp_la_list); | 1073 | LIST_HEAD(tmp_la_list); |
902 | 1074 | ||
903 | mlog_entry_void(); | 1075 | mlog_entry_void(); |
@@ -913,6 +1085,8 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
913 | 1085 | ||
914 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); | 1086 | mlog(0, "Complete recovery for slot %d\n", item->lri_slot); |
915 | 1087 | ||
1088 | ocfs2_wait_on_quotas(osb); | ||
1089 | |||
916 | la_dinode = item->lri_la_dinode; | 1090 | la_dinode = item->lri_la_dinode; |
917 | if (la_dinode) { | 1091 | if (la_dinode) { |
918 | mlog(0, "Clean up local alloc %llu\n", | 1092 | mlog(0, "Clean up local alloc %llu\n", |
@@ -943,6 +1117,16 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
943 | if (ret < 0) | 1117 | if (ret < 0) |
944 | mlog_errno(ret); | 1118 | mlog_errno(ret); |
945 | 1119 | ||
1120 | qrec = item->lri_qrec; | ||
1121 | if (qrec) { | ||
1122 | mlog(0, "Recovering quota files"); | ||
1123 | ret = ocfs2_finish_quota_recovery(osb, qrec, | ||
1124 | item->lri_slot); | ||
1125 | if (ret < 0) | ||
1126 | mlog_errno(ret); | ||
1127 | /* Recovery info is already freed now */ | ||
1128 | } | ||
1129 | |||
946 | kfree(item); | 1130 | kfree(item); |
947 | } | 1131 | } |
948 | 1132 | ||
@@ -956,7 +1140,8 @@ void ocfs2_complete_recovery(struct work_struct *work) | |||
956 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | 1140 | static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, |
957 | int slot_num, | 1141 | int slot_num, |
958 | struct ocfs2_dinode *la_dinode, | 1142 | struct ocfs2_dinode *la_dinode, |
959 | struct ocfs2_dinode *tl_dinode) | 1143 | struct ocfs2_dinode *tl_dinode, |
1144 | struct ocfs2_quota_recovery *qrec) | ||
960 | { | 1145 | { |
961 | struct ocfs2_la_recovery_item *item; | 1146 | struct ocfs2_la_recovery_item *item; |
962 | 1147 | ||
@@ -971,6 +1156,9 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | |||
971 | if (tl_dinode) | 1156 | if (tl_dinode) |
972 | kfree(tl_dinode); | 1157 | kfree(tl_dinode); |
973 | 1158 | ||
1159 | if (qrec) | ||
1160 | ocfs2_free_quota_recovery(qrec); | ||
1161 | |||
974 | mlog_errno(-ENOMEM); | 1162 | mlog_errno(-ENOMEM); |
975 | return; | 1163 | return; |
976 | } | 1164 | } |
@@ -979,6 +1167,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, | |||
979 | item->lri_la_dinode = la_dinode; | 1167 | item->lri_la_dinode = la_dinode; |
980 | item->lri_slot = slot_num; | 1168 | item->lri_slot = slot_num; |
981 | item->lri_tl_dinode = tl_dinode; | 1169 | item->lri_tl_dinode = tl_dinode; |
1170 | item->lri_qrec = qrec; | ||
982 | 1171 | ||
983 | spin_lock(&journal->j_lock); | 1172 | spin_lock(&journal->j_lock); |
984 | list_add_tail(&item->lri_list, &journal->j_la_cleanups); | 1173 | list_add_tail(&item->lri_list, &journal->j_la_cleanups); |
@@ -998,6 +1187,7 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) | |||
998 | ocfs2_queue_recovery_completion(journal, | 1187 | ocfs2_queue_recovery_completion(journal, |
999 | osb->slot_num, | 1188 | osb->slot_num, |
1000 | osb->local_alloc_copy, | 1189 | osb->local_alloc_copy, |
1190 | NULL, | ||
1001 | NULL); | 1191 | NULL); |
1002 | ocfs2_schedule_truncate_log_flush(osb, 0); | 1192 | ocfs2_schedule_truncate_log_flush(osb, 0); |
1003 | 1193 | ||
@@ -1006,11 +1196,26 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) | |||
1006 | } | 1196 | } |
1007 | } | 1197 | } |
1008 | 1198 | ||
1199 | void ocfs2_complete_quota_recovery(struct ocfs2_super *osb) | ||
1200 | { | ||
1201 | if (osb->quota_rec) { | ||
1202 | ocfs2_queue_recovery_completion(osb->journal, | ||
1203 | osb->slot_num, | ||
1204 | NULL, | ||
1205 | NULL, | ||
1206 | osb->quota_rec); | ||
1207 | osb->quota_rec = NULL; | ||
1208 | } | ||
1209 | } | ||
1210 | |||
1009 | static int __ocfs2_recovery_thread(void *arg) | 1211 | static int __ocfs2_recovery_thread(void *arg) |
1010 | { | 1212 | { |
1011 | int status, node_num; | 1213 | int status, node_num, slot_num; |
1012 | struct ocfs2_super *osb = arg; | 1214 | struct ocfs2_super *osb = arg; |
1013 | struct ocfs2_recovery_map *rm = osb->recovery_map; | 1215 | struct ocfs2_recovery_map *rm = osb->recovery_map; |
1216 | int *rm_quota = NULL; | ||
1217 | int rm_quota_used = 0, i; | ||
1218 | struct ocfs2_quota_recovery *qrec; | ||
1014 | 1219 | ||
1015 | mlog_entry_void(); | 1220 | mlog_entry_void(); |
1016 | 1221 | ||
@@ -1019,6 +1224,11 @@ static int __ocfs2_recovery_thread(void *arg) | |||
1019 | goto bail; | 1224 | goto bail; |
1020 | } | 1225 | } |
1021 | 1226 | ||
1227 | rm_quota = kzalloc(osb->max_slots * sizeof(int), GFP_NOFS); | ||
1228 | if (!rm_quota) { | ||
1229 | status = -ENOMEM; | ||
1230 | goto bail; | ||
1231 | } | ||
1022 | restart: | 1232 | restart: |
1023 | status = ocfs2_super_lock(osb, 1); | 1233 | status = ocfs2_super_lock(osb, 1); |
1024 | if (status < 0) { | 1234 | if (status < 0) { |
@@ -1032,8 +1242,28 @@ restart: | |||
1032 | * clear it until ocfs2_recover_node() has succeeded. */ | 1242 | * clear it until ocfs2_recover_node() has succeeded. */ |
1033 | node_num = rm->rm_entries[0]; | 1243 | node_num = rm->rm_entries[0]; |
1034 | spin_unlock(&osb->osb_lock); | 1244 | spin_unlock(&osb->osb_lock); |
1035 | 1245 | mlog(0, "checking node %d\n", node_num); | |
1036 | status = ocfs2_recover_node(osb, node_num); | 1246 | slot_num = ocfs2_node_num_to_slot(osb, node_num); |
1247 | if (slot_num == -ENOENT) { | ||
1248 | status = 0; | ||
1249 | mlog(0, "no slot for this node, so no recovery" | ||
1250 | "required.\n"); | ||
1251 | goto skip_recovery; | ||
1252 | } | ||
1253 | mlog(0, "node %d was using slot %d\n", node_num, slot_num); | ||
1254 | |||
1255 | /* It is a bit subtle with quota recovery. We cannot do it | ||
1256 | * immediately because we have to obtain cluster locks from | ||
1257 | * quota files and we also don't want to just skip it because | ||
1258 | * then quota usage would be out of sync until some node takes | ||
1259 | * the slot. So we remember which nodes need quota recovery | ||
1260 | * and when everything else is done, we recover quotas. */ | ||
1261 | for (i = 0; i < rm_quota_used && rm_quota[i] != slot_num; i++); | ||
1262 | if (i == rm_quota_used) | ||
1263 | rm_quota[rm_quota_used++] = slot_num; | ||
1264 | |||
1265 | status = ocfs2_recover_node(osb, node_num, slot_num); | ||
1266 | skip_recovery: | ||
1037 | if (!status) { | 1267 | if (!status) { |
1038 | ocfs2_recovery_map_clear(osb, node_num); | 1268 | ocfs2_recovery_map_clear(osb, node_num); |
1039 | } else { | 1269 | } else { |
@@ -1055,13 +1285,27 @@ restart: | |||
1055 | if (status < 0) | 1285 | if (status < 0) |
1056 | mlog_errno(status); | 1286 | mlog_errno(status); |
1057 | 1287 | ||
1288 | /* Now it is right time to recover quotas... We have to do this under | ||
1289 | * superblock lock so that noone can start using the slot (and crash) | ||
1290 | * before we recover it */ | ||
1291 | for (i = 0; i < rm_quota_used; i++) { | ||
1292 | qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); | ||
1293 | if (IS_ERR(qrec)) { | ||
1294 | status = PTR_ERR(qrec); | ||
1295 | mlog_errno(status); | ||
1296 | continue; | ||
1297 | } | ||
1298 | ocfs2_queue_recovery_completion(osb->journal, rm_quota[i], | ||
1299 | NULL, NULL, qrec); | ||
1300 | } | ||
1301 | |||
1058 | ocfs2_super_unlock(osb, 1); | 1302 | ocfs2_super_unlock(osb, 1); |
1059 | 1303 | ||
1060 | /* We always run recovery on our own orphan dir - the dead | 1304 | /* We always run recovery on our own orphan dir - the dead |
1061 | * node(s) may have disallowd a previos inode delete. Re-processing | 1305 | * node(s) may have disallowd a previos inode delete. Re-processing |
1062 | * is therefore required. */ | 1306 | * is therefore required. */ |
1063 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 1307 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, |
1064 | NULL); | 1308 | NULL, NULL); |
1065 | 1309 | ||
1066 | bail: | 1310 | bail: |
1067 | mutex_lock(&osb->recovery_lock); | 1311 | mutex_lock(&osb->recovery_lock); |
@@ -1076,6 +1320,9 @@ bail: | |||
1076 | 1320 | ||
1077 | mutex_unlock(&osb->recovery_lock); | 1321 | mutex_unlock(&osb->recovery_lock); |
1078 | 1322 | ||
1323 | if (rm_quota) | ||
1324 | kfree(rm_quota); | ||
1325 | |||
1079 | mlog_exit(status); | 1326 | mlog_exit(status); |
1080 | /* no one is callint kthread_stop() for us so the kthread() api | 1327 | /* no one is callint kthread_stop() for us so the kthread() api |
1081 | * requires that we call do_exit(). And it isn't exported, but | 1328 | * requires that we call do_exit(). And it isn't exported, but |
@@ -1135,8 +1382,7 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, | |||
1135 | } | 1382 | } |
1136 | SET_INODE_JOURNAL(inode); | 1383 | SET_INODE_JOURNAL(inode); |
1137 | 1384 | ||
1138 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, | 1385 | status = ocfs2_read_inode_block_full(inode, bh, OCFS2_BH_IGNORE_CACHE); |
1139 | OCFS2_BH_IGNORE_CACHE); | ||
1140 | if (status < 0) { | 1386 | if (status < 0) { |
1141 | mlog_errno(status); | 1387 | mlog_errno(status); |
1142 | goto bail; | 1388 | goto bail; |
@@ -1268,6 +1514,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
1268 | osb->slot_recovery_generations[slot_num] = | 1514 | osb->slot_recovery_generations[slot_num] = |
1269 | ocfs2_get_recovery_generation(fe); | 1515 | ocfs2_get_recovery_generation(fe); |
1270 | 1516 | ||
1517 | ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); | ||
1271 | status = ocfs2_write_block(osb, bh, inode); | 1518 | status = ocfs2_write_block(osb, bh, inode); |
1272 | if (status < 0) | 1519 | if (status < 0) |
1273 | mlog_errno(status); | 1520 | mlog_errno(status); |
@@ -1304,31 +1551,19 @@ done: | |||
1304 | * far less concerning. | 1551 | * far less concerning. |
1305 | */ | 1552 | */ |
1306 | static int ocfs2_recover_node(struct ocfs2_super *osb, | 1553 | static int ocfs2_recover_node(struct ocfs2_super *osb, |
1307 | int node_num) | 1554 | int node_num, int slot_num) |
1308 | { | 1555 | { |
1309 | int status = 0; | 1556 | int status = 0; |
1310 | int slot_num; | ||
1311 | struct ocfs2_dinode *la_copy = NULL; | 1557 | struct ocfs2_dinode *la_copy = NULL; |
1312 | struct ocfs2_dinode *tl_copy = NULL; | 1558 | struct ocfs2_dinode *tl_copy = NULL; |
1313 | 1559 | ||
1314 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", | 1560 | mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", |
1315 | node_num, osb->node_num); | 1561 | node_num, slot_num, osb->node_num); |
1316 | |||
1317 | mlog(0, "checking node %d\n", node_num); | ||
1318 | 1562 | ||
1319 | /* Should not ever be called to recover ourselves -- in that | 1563 | /* Should not ever be called to recover ourselves -- in that |
1320 | * case we should've called ocfs2_journal_load instead. */ | 1564 | * case we should've called ocfs2_journal_load instead. */ |
1321 | BUG_ON(osb->node_num == node_num); | 1565 | BUG_ON(osb->node_num == node_num); |
1322 | 1566 | ||
1323 | slot_num = ocfs2_node_num_to_slot(osb, node_num); | ||
1324 | if (slot_num == -ENOENT) { | ||
1325 | status = 0; | ||
1326 | mlog(0, "no slot for this node, so no recovery required.\n"); | ||
1327 | goto done; | ||
1328 | } | ||
1329 | |||
1330 | mlog(0, "node %d was using slot %d\n", node_num, slot_num); | ||
1331 | |||
1332 | status = ocfs2_replay_journal(osb, node_num, slot_num); | 1567 | status = ocfs2_replay_journal(osb, node_num, slot_num); |
1333 | if (status < 0) { | 1568 | if (status < 0) { |
1334 | if (status == -EBUSY) { | 1569 | if (status == -EBUSY) { |
@@ -1364,7 +1599,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1364 | 1599 | ||
1365 | /* This will kfree the memory pointed to by la_copy and tl_copy */ | 1600 | /* This will kfree the memory pointed to by la_copy and tl_copy */ |
1366 | ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, | 1601 | ocfs2_queue_recovery_completion(osb->journal, slot_num, la_copy, |
1367 | tl_copy); | 1602 | tl_copy, NULL); |
1368 | 1603 | ||
1369 | status = 0; | 1604 | status = 0; |
1370 | done: | 1605 | done: |
@@ -1659,13 +1894,14 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1659 | return ret; | 1894 | return ret; |
1660 | } | 1895 | } |
1661 | 1896 | ||
1662 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 1897 | static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota) |
1663 | { | 1898 | { |
1664 | /* This check is good because ocfs2 will wait on our recovery | 1899 | /* This check is good because ocfs2 will wait on our recovery |
1665 | * thread before changing it to something other than MOUNTED | 1900 | * thread before changing it to something other than MOUNTED |
1666 | * or DISABLED. */ | 1901 | * or DISABLED. */ |
1667 | wait_event(osb->osb_mount_event, | 1902 | wait_event(osb->osb_mount_event, |
1668 | atomic_read(&osb->vol_state) == VOLUME_MOUNTED || | 1903 | (!quota && atomic_read(&osb->vol_state) == VOLUME_MOUNTED) || |
1904 | atomic_read(&osb->vol_state) == VOLUME_MOUNTED_QUOTAS || | ||
1669 | atomic_read(&osb->vol_state) == VOLUME_DISABLED); | 1905 | atomic_read(&osb->vol_state) == VOLUME_DISABLED); |
1670 | 1906 | ||
1671 | /* If there's an error on mount, then we may never get to the | 1907 | /* If there's an error on mount, then we may never get to the |