aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRyusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>2009-04-06 22:01:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-07 11:31:16 -0400
commit0f3e1c7f23f8a6f8224fa1d275381f6d9279ad4b (patch)
treeb46181f8caa65d3b829acadd644b44afd465cd63
parentf30bf3e40f80ef50c17f55271deae3abc03e793e (diff)
nilfs2: recovery functions
This adds recovery function on mount. Usually the recovery is achieved by just finding the latest super root. When logs without checkpoints were appended for data sync operations after the latest super root, the recovery function will perform roll forwarding and reconstruct new log(s) with a super root. Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/nilfs2/recovery.c941
1 files changed, 941 insertions, 0 deletions
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
new file mode 100644
index 000000000000..877dc1ba23f3
--- /dev/null
+++ b/fs/nilfs2/recovery.c
@@ -0,0 +1,941 @@
1/*
2 * recovery.c - NILFS recovery logic
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 */
22
23#include <linux/buffer_head.h>
24#include <linux/blkdev.h>
25#include <linux/swap.h>
26#include <linux/crc32.h>
27#include "nilfs.h"
28#include "segment.h"
29#include "sufile.h"
30#include "page.h"
31#include "seglist.h"
32#include "segbuf.h"
33
34/*
35 * Segment check result
36 */
37enum {
38 NILFS_SEG_VALID,
39 NILFS_SEG_NO_SUPER_ROOT,
40 NILFS_SEG_FAIL_IO,
41 NILFS_SEG_FAIL_MAGIC,
42 NILFS_SEG_FAIL_SEQ,
43 NILFS_SEG_FAIL_CHECKSUM_SEGSUM,
44 NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT,
45 NILFS_SEG_FAIL_CHECKSUM_FULL,
46 NILFS_SEG_FAIL_CONSISTENCY,
47};
48
49/* work structure for recovery */
50struct nilfs_recovery_block {
51 ino_t ino; /* Inode number of the file that this block
52 belongs to */
53 sector_t blocknr; /* block number */
54 __u64 vblocknr; /* virtual block number */
55 unsigned long blkoff; /* File offset of the data block (per block) */
56 struct list_head list;
57};
58
59
60static int nilfs_warn_segment_error(int err)
61{
62 switch (err) {
63 case NILFS_SEG_FAIL_IO:
64 printk(KERN_WARNING
65 "NILFS warning: I/O error on loading last segment\n");
66 return -EIO;
67 case NILFS_SEG_FAIL_MAGIC:
68 printk(KERN_WARNING
69 "NILFS warning: Segment magic number invalid\n");
70 break;
71 case NILFS_SEG_FAIL_SEQ:
72 printk(KERN_WARNING
73 "NILFS warning: Sequence number mismatch\n");
74 break;
75 case NILFS_SEG_FAIL_CHECKSUM_SEGSUM:
76 printk(KERN_WARNING
77 "NILFS warning: Checksum error in segment summary\n");
78 break;
79 case NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT:
80 printk(KERN_WARNING
81 "NILFS warning: Checksum error in super root\n");
82 break;
83 case NILFS_SEG_FAIL_CHECKSUM_FULL:
84 printk(KERN_WARNING
85 "NILFS warning: Checksum error in segment payload\n");
86 break;
87 case NILFS_SEG_FAIL_CONSISTENCY:
88 printk(KERN_WARNING
89 "NILFS warning: Inconsistent segment\n");
90 break;
91 case NILFS_SEG_NO_SUPER_ROOT:
92 printk(KERN_WARNING
93 "NILFS warning: No super root in the last segment\n");
94 break;
95 case NILFS_SEG_VALID:
96 default:
97 BUG();
98 }
99 return -EINVAL;
100}
101
102static void store_segsum_info(struct nilfs_segsum_info *ssi,
103 struct nilfs_segment_summary *sum,
104 unsigned int blocksize)
105{
106 ssi->flags = le16_to_cpu(sum->ss_flags);
107 ssi->seg_seq = le64_to_cpu(sum->ss_seq);
108 ssi->ctime = le64_to_cpu(sum->ss_create);
109 ssi->next = le64_to_cpu(sum->ss_next);
110 ssi->nblocks = le32_to_cpu(sum->ss_nblocks);
111 ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo);
112 ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes);
113
114 ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
115 ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
116}
117
118/**
119 * calc_crc_cont - check CRC of blocks continuously
120 * @sbi: nilfs_sb_info
121 * @bhs: buffer head of start block
122 * @sum: place to store result
123 * @offset: offset bytes in the first block
124 * @check_bytes: number of bytes to be checked
125 * @start: DBN of start block
126 * @nblock: number of blocks to be checked
127 */
128static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs,
129 u32 *sum, unsigned long offset, u64 check_bytes,
130 sector_t start, unsigned long nblock)
131{
132 unsigned long blocksize = sbi->s_super->s_blocksize;
133 unsigned long size;
134 u32 crc;
135
136 BUG_ON(offset >= blocksize);
137 check_bytes -= offset;
138 size = min_t(u64, check_bytes, blocksize - offset);
139 crc = crc32_le(sbi->s_nilfs->ns_crc_seed,
140 (unsigned char *)bhs->b_data + offset, size);
141 if (--nblock > 0) {
142 do {
143 struct buffer_head *bh
144 = sb_bread(sbi->s_super, ++start);
145 if (!bh)
146 return -EIO;
147 check_bytes -= size;
148 size = min_t(u64, check_bytes, blocksize);
149 crc = crc32_le(crc, bh->b_data, size);
150 brelse(bh);
151 } while (--nblock > 0);
152 }
153 *sum = crc;
154 return 0;
155}
156
157/**
158 * nilfs_read_super_root_block - read super root block
159 * @sb: super_block
160 * @sr_block: disk block number of the super root block
161 * @pbh: address of a buffer_head pointer to return super root buffer
162 * @check: CRC check flag
163 */
164int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block,
165 struct buffer_head **pbh, int check)
166{
167 struct buffer_head *bh_sr;
168 struct nilfs_super_root *sr;
169 u32 crc;
170 int ret;
171
172 *pbh = NULL;
173 bh_sr = sb_bread(sb, sr_block);
174 if (unlikely(!bh_sr)) {
175 ret = NILFS_SEG_FAIL_IO;
176 goto failed;
177 }
178
179 sr = (struct nilfs_super_root *)bh_sr->b_data;
180 if (check) {
181 unsigned bytes = le16_to_cpu(sr->sr_bytes);
182
183 if (bytes == 0 || bytes > sb->s_blocksize) {
184 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
185 goto failed_bh;
186 }
187 if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc,
188 sizeof(sr->sr_sum), bytes, sr_block, 1)) {
189 ret = NILFS_SEG_FAIL_IO;
190 goto failed_bh;
191 }
192 if (crc != le32_to_cpu(sr->sr_sum)) {
193 ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT;
194 goto failed_bh;
195 }
196 }
197 *pbh = bh_sr;
198 return 0;
199
200 failed_bh:
201 brelse(bh_sr);
202
203 failed:
204 return nilfs_warn_segment_error(ret);
205}
206
207/**
208 * load_segment_summary - read segment summary of the specified partial segment
209 * @sbi: nilfs_sb_info
210 * @pseg_start: start disk block number of partial segment
211 * @seg_seq: sequence number requested
212 * @ssi: pointer to nilfs_segsum_info struct to store information
213 * @full_check: full check flag
214 * (0: only checks segment summary CRC, 1: data CRC)
215 */
216static int
217load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start,
218 u64 seg_seq, struct nilfs_segsum_info *ssi,
219 int full_check)
220{
221 struct buffer_head *bh_sum;
222 struct nilfs_segment_summary *sum;
223 unsigned long offset, nblock;
224 u64 check_bytes;
225 u32 crc, crc_sum;
226 int ret = NILFS_SEG_FAIL_IO;
227
228 bh_sum = sb_bread(sbi->s_super, pseg_start);
229 if (!bh_sum)
230 goto out;
231
232 sum = (struct nilfs_segment_summary *)bh_sum->b_data;
233
234 /* Check consistency of segment summary */
235 if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) {
236 ret = NILFS_SEG_FAIL_MAGIC;
237 goto failed;
238 }
239 store_segsum_info(ssi, sum, sbi->s_super->s_blocksize);
240 if (seg_seq != ssi->seg_seq) {
241 ret = NILFS_SEG_FAIL_SEQ;
242 goto failed;
243 }
244 if (full_check) {
245 offset = sizeof(sum->ss_datasum);
246 check_bytes =
247 ((u64)ssi->nblocks << sbi->s_super->s_blocksize_bits);
248 nblock = ssi->nblocks;
249 crc_sum = le32_to_cpu(sum->ss_datasum);
250 ret = NILFS_SEG_FAIL_CHECKSUM_FULL;
251 } else { /* only checks segment summary */
252 offset = sizeof(sum->ss_datasum) + sizeof(sum->ss_sumsum);
253 check_bytes = ssi->sumbytes;
254 nblock = ssi->nsumblk;
255 crc_sum = le32_to_cpu(sum->ss_sumsum);
256 ret = NILFS_SEG_FAIL_CHECKSUM_SEGSUM;
257 }
258
259 if (unlikely(nblock == 0 ||
260 nblock > sbi->s_nilfs->ns_blocks_per_segment)) {
261 /* This limits the number of blocks read in the CRC check */
262 ret = NILFS_SEG_FAIL_CONSISTENCY;
263 goto failed;
264 }
265 if (calc_crc_cont(sbi, bh_sum, &crc, offset, check_bytes,
266 pseg_start, nblock)) {
267 ret = NILFS_SEG_FAIL_IO;
268 goto failed;
269 }
270 if (crc == crc_sum)
271 ret = 0;
272 failed:
273 brelse(bh_sum);
274 out:
275 return ret;
276}
277
278static void *segsum_get(struct super_block *sb, struct buffer_head **pbh,
279 unsigned int *offset, unsigned int bytes)
280{
281 void *ptr;
282 sector_t blocknr;
283
284 BUG_ON((*pbh)->b_size < *offset);
285 if (bytes > (*pbh)->b_size - *offset) {
286 blocknr = (*pbh)->b_blocknr;
287 brelse(*pbh);
288 *pbh = sb_bread(sb, blocknr + 1);
289 if (unlikely(!*pbh))
290 return NULL;
291 *offset = 0;
292 }
293 ptr = (*pbh)->b_data + *offset;
294 *offset += bytes;
295 return ptr;
296}
297
298static void segsum_skip(struct super_block *sb, struct buffer_head **pbh,
299 unsigned int *offset, unsigned int bytes,
300 unsigned long count)
301{
302 unsigned int rest_item_in_current_block
303 = ((*pbh)->b_size - *offset) / bytes;
304
305 if (count <= rest_item_in_current_block) {
306 *offset += bytes * count;
307 } else {
308 sector_t blocknr = (*pbh)->b_blocknr;
309 unsigned int nitem_per_block = (*pbh)->b_size / bytes;
310 unsigned int bcnt;
311
312 count -= rest_item_in_current_block;
313 bcnt = DIV_ROUND_UP(count, nitem_per_block);
314 *offset = bytes * (count - (bcnt - 1) * nitem_per_block);
315
316 brelse(*pbh);
317 *pbh = sb_bread(sb, blocknr + bcnt);
318 }
319}
320
321static int
322collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr,
323 struct nilfs_segsum_info *ssi,
324 struct list_head *head)
325{
326 struct buffer_head *bh;
327 unsigned int offset;
328 unsigned long nfinfo = ssi->nfinfo;
329 sector_t blocknr = sum_blocknr + ssi->nsumblk;
330 ino_t ino;
331 int err = -EIO;
332
333 if (!nfinfo)
334 return 0;
335
336 bh = sb_bread(sbi->s_super, sum_blocknr);
337 if (unlikely(!bh))
338 goto out;
339
340 offset = le16_to_cpu(
341 ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes);
342 for (;;) {
343 unsigned long nblocks, ndatablk, nnodeblk;
344 struct nilfs_finfo *finfo;
345
346 finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo));
347 if (unlikely(!finfo))
348 goto out;
349
350 ino = le64_to_cpu(finfo->fi_ino);
351 nblocks = le32_to_cpu(finfo->fi_nblocks);
352 ndatablk = le32_to_cpu(finfo->fi_ndatablk);
353 nnodeblk = nblocks - ndatablk;
354
355 while (ndatablk-- > 0) {
356 struct nilfs_recovery_block *rb;
357 struct nilfs_binfo_v *binfo;
358
359 binfo = segsum_get(sbi->s_super, &bh, &offset,
360 sizeof(*binfo));
361 if (unlikely(!binfo))
362 goto out;
363
364 rb = kmalloc(sizeof(*rb), GFP_NOFS);
365 if (unlikely(!rb)) {
366 err = -ENOMEM;
367 goto out;
368 }
369 rb->ino = ino;
370 rb->blocknr = blocknr++;
371 rb->vblocknr = le64_to_cpu(binfo->bi_vblocknr);
372 rb->blkoff = le64_to_cpu(binfo->bi_blkoff);
373 /* INIT_LIST_HEAD(&rb->list); */
374 list_add_tail(&rb->list, head);
375 }
376 if (--nfinfo == 0)
377 break;
378 blocknr += nnodeblk; /* always 0 for the data sync segments */
379 segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64),
380 nnodeblk);
381 if (unlikely(!bh))
382 goto out;
383 }
384 err = 0;
385 out:
386 brelse(bh); /* brelse(NULL) is just ignored */
387 return err;
388}
389
390static void dispose_recovery_list(struct list_head *head)
391{
392 while (!list_empty(head)) {
393 struct nilfs_recovery_block *rb
394 = list_entry(head->next,
395 struct nilfs_recovery_block, list);
396 list_del(&rb->list);
397 kfree(rb);
398 }
399}
400
401void nilfs_dispose_segment_list(struct list_head *head)
402{
403 while (!list_empty(head)) {
404 struct nilfs_segment_entry *ent
405 = list_entry(head->next,
406 struct nilfs_segment_entry, list);
407 list_del(&ent->list);
408 nilfs_free_segment_entry(ent);
409 }
410}
411
412static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
413 struct nilfs_recovery_info *ri)
414{
415 struct list_head *head = &ri->ri_used_segments;
416 struct nilfs_segment_entry *ent, *n;
417 struct inode *sufile = nilfs->ns_sufile;
418 __u64 segnum[4];
419 int err;
420 int i;
421
422 segnum[0] = nilfs->ns_segnum;
423 segnum[1] = nilfs->ns_nextnum;
424 segnum[2] = ri->ri_segnum;
425 segnum[3] = ri->ri_nextnum;
426
427 /*
428 * Releasing the next segment of the latest super root.
429 * The next segment is invalidated by this recovery.
430 */
431 err = nilfs_sufile_free(sufile, segnum[1]);
432 if (unlikely(err))
433 goto failed;
434
435 err = -ENOMEM;
436 for (i = 1; i < 4; i++) {
437 ent = nilfs_alloc_segment_entry(segnum[i]);
438 if (unlikely(!ent))
439 goto failed;
440 list_add_tail(&ent->list, head);
441 }
442
443 /*
444 * Collecting segments written after the latest super root.
445 * These are marked volatile active, and won't be reallocated in
446 * the next construction.
447 */
448 list_for_each_entry_safe(ent, n, head, list) {
449 if (ent->segnum == segnum[0]) {
450 list_del(&ent->list);
451 nilfs_free_segment_entry(ent);
452 continue;
453 }
454 err = nilfs_open_segment_entry(ent, sufile);
455 if (unlikely(err))
456 goto failed;
457 if (nilfs_segment_usage_clean(ent->raw_su)) {
458 nilfs_segment_usage_set_volatile_active(ent->raw_su);
459 /* Keep it open */
460 } else {
461 /* Removing duplicated entries */
462 list_del(&ent->list);
463 nilfs_close_segment_entry(ent, sufile);
464 nilfs_free_segment_entry(ent);
465 }
466 }
467 list_splice_init(head, nilfs->ns_used_segments.prev);
468
469 /*
470 * The segment having the latest super root is active, and
471 * should be deactivated on the next construction for recovery.
472 */
473 err = -ENOMEM;
474 ent = nilfs_alloc_segment_entry(segnum[0]);
475 if (unlikely(!ent))
476 goto failed;
477 list_add_tail(&ent->list, &ri->ri_used_segments);
478
479 /* Allocate new segments for recovery */
480 err = nilfs_sufile_alloc(sufile, &segnum[0]);
481 if (unlikely(err))
482 goto failed;
483
484 nilfs->ns_pseg_offset = 0;
485 nilfs->ns_seg_seq = ri->ri_seq + 2;
486 nilfs->ns_nextnum = nilfs->ns_segnum = segnum[0];
487 return 0;
488
489 failed:
490 /* No need to recover sufile because it will be destroyed on error */
491 return err;
492}
493
494static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi,
495 struct nilfs_recovery_block *rb,
496 struct page *page)
497{
498 struct buffer_head *bh_org;
499 void *kaddr;
500
501 bh_org = sb_bread(sbi->s_super, rb->blocknr);
502 if (unlikely(!bh_org))
503 return -EIO;
504
505 kaddr = kmap_atomic(page, KM_USER0);
506 memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
507 kunmap_atomic(kaddr, KM_USER0);
508 brelse(bh_org);
509 return 0;
510}
511
512static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
513 struct list_head *head,
514 unsigned long *nr_salvaged_blocks)
515{
516 struct inode *inode;
517 struct nilfs_recovery_block *rb, *n;
518 unsigned blocksize = sbi->s_super->s_blocksize;
519 struct page *page;
520 loff_t pos;
521 int err = 0, err2 = 0;
522
523 list_for_each_entry_safe(rb, n, head, list) {
524 inode = nilfs_iget(sbi->s_super, rb->ino);
525 if (IS_ERR(inode)) {
526 err = PTR_ERR(inode);
527 inode = NULL;
528 goto failed_inode;
529 }
530
531 pos = rb->blkoff << inode->i_blkbits;
532 page = NULL;
533 err = block_write_begin(NULL, inode->i_mapping, pos, blocksize,
534 0, &page, NULL, nilfs_get_block);
535 if (unlikely(err))
536 goto failed_inode;
537
538 err = nilfs_recovery_copy_block(sbi, rb, page);
539 if (unlikely(err))
540 goto failed_page;
541
542 err = nilfs_set_file_dirty(sbi, inode, 1);
543 if (unlikely(err))
544 goto failed_page;
545
546 block_write_end(NULL, inode->i_mapping, pos, blocksize,
547 blocksize, page, NULL);
548
549 unlock_page(page);
550 page_cache_release(page);
551
552 (*nr_salvaged_blocks)++;
553 goto next;
554
555 failed_page:
556 unlock_page(page);
557 page_cache_release(page);
558
559 failed_inode:
560 printk(KERN_WARNING
561 "NILFS warning: error recovering data block "
562 "(err=%d, ino=%lu, block-offset=%llu)\n",
563 err, rb->ino, (unsigned long long)rb->blkoff);
564 if (!err2)
565 err2 = err;
566 next:
567 iput(inode); /* iput(NULL) is just ignored */
568 list_del_init(&rb->list);
569 kfree(rb);
570 }
571 return err2;
572}
573
574/**
575 * nilfs_do_roll_forward - salvage logical segments newer than the latest
576 * checkpoint
577 * @sbi: nilfs_sb_info
578 * @nilfs: the_nilfs
579 * @ri: pointer to a nilfs_recovery_info
580 */
581static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
582 struct nilfs_sb_info *sbi,
583 struct nilfs_recovery_info *ri)
584{
585 struct nilfs_segsum_info ssi;
586 sector_t pseg_start;
587 sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */
588 unsigned long nsalvaged_blocks = 0;
589 u64 seg_seq;
590 __u64 segnum, nextnum = 0;
591 int empty_seg = 0;
592 int err = 0, ret;
593 LIST_HEAD(dsync_blocks); /* list of data blocks to be recovered */
594 enum {
595 RF_INIT_ST,
596 RF_DSYNC_ST, /* scanning data-sync segments */
597 };
598 int state = RF_INIT_ST;
599
600 nilfs_attach_writer(nilfs, sbi);
601 pseg_start = ri->ri_lsegs_start;
602 seg_seq = ri->ri_lsegs_start_seq;
603 segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
604 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
605
606 while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) {
607
608 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
609 if (ret) {
610 if (ret == NILFS_SEG_FAIL_IO) {
611 err = -EIO;
612 goto failed;
613 }
614 goto strayed;
615 }
616 if (unlikely(NILFS_SEG_HAS_SR(&ssi)))
617 goto confused;
618
619 /* Found a valid partial segment; do recovery actions */
620 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
621 empty_seg = 0;
622 nilfs->ns_ctime = ssi.ctime;
623 if (!(ssi.flags & NILFS_SS_GC))
624 nilfs->ns_nongc_ctime = ssi.ctime;
625
626 switch (state) {
627 case RF_INIT_ST:
628 if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi))
629 goto try_next_pseg;
630 state = RF_DSYNC_ST;
631 /* Fall through */
632 case RF_DSYNC_ST:
633 if (!NILFS_SEG_DSYNC(&ssi))
634 goto confused;
635
636 err = collect_blocks_from_segsum(
637 sbi, pseg_start, &ssi, &dsync_blocks);
638 if (unlikely(err))
639 goto failed;
640 if (NILFS_SEG_LOGEND(&ssi)) {
641 err = recover_dsync_blocks(
642 sbi, &dsync_blocks, &nsalvaged_blocks);
643 if (unlikely(err))
644 goto failed;
645 state = RF_INIT_ST;
646 }
647 break; /* Fall through to try_next_pseg */
648 }
649
650 try_next_pseg:
651 if (pseg_start == ri->ri_lsegs_end)
652 break;
653 pseg_start += ssi.nblocks;
654 if (pseg_start < seg_end)
655 continue;
656 goto feed_segment;
657
658 strayed:
659 if (pseg_start == ri->ri_lsegs_end)
660 break;
661
662 feed_segment:
663 /* Looking to the next full segment */
664 if (empty_seg++)
665 break;
666 seg_seq++;
667 segnum = nextnum;
668 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
669 pseg_start = seg_start;
670 }
671
672 if (nsalvaged_blocks) {
673 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
674 sbi->s_super->s_id, nsalvaged_blocks);
675 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
676 }
677 out:
678 dispose_recovery_list(&dsync_blocks);
679 nilfs_detach_writer(sbi->s_nilfs, sbi);
680 return err;
681
682 confused:
683 err = -EINVAL;
684 failed:
685 printk(KERN_ERR
686 "NILFS (device %s): Error roll-forwarding "
687 "(err=%d, pseg block=%llu). ",
688 sbi->s_super->s_id, err, (unsigned long long)pseg_start);
689 goto out;
690}
691
692static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
693 struct nilfs_sb_info *sbi,
694 struct nilfs_recovery_info *ri)
695{
696 struct buffer_head *bh;
697 int err;
698
699 if (nilfs_get_segnum_of_block(nilfs, ri->ri_lsegs_start) !=
700 nilfs_get_segnum_of_block(nilfs, ri->ri_super_root))
701 return;
702
703 bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start);
704 BUG_ON(!bh);
705 memset(bh->b_data, 0, bh->b_size);
706 set_buffer_dirty(bh);
707 err = sync_dirty_buffer(bh);
708 if (unlikely(err))
709 printk(KERN_WARNING
710 "NILFS warning: buffer sync write failed during "
711 "post-cleaning of recovery.\n");
712 brelse(bh);
713}
714
715/**
716 * nilfs_recover_logical_segments - salvage logical segments written after
717 * the latest super root
718 * @nilfs: the_nilfs
719 * @sbi: nilfs_sb_info
720 * @ri: pointer to a nilfs_recovery_info struct to store search results.
721 *
722 * Return Value: On success, 0 is returned. On error, one of the following
723 * negative error code is returned.
724 *
725 * %-EINVAL - Inconsistent filesystem state.
726 *
727 * %-EIO - I/O error
728 *
729 * %-ENOSPC - No space left on device (only in a panic state).
730 *
731 * %-ERESTARTSYS - Interrupted.
732 *
733 * %-ENOMEM - Insufficient memory available.
734 */
735int nilfs_recover_logical_segments(struct the_nilfs *nilfs,
736 struct nilfs_sb_info *sbi,
737 struct nilfs_recovery_info *ri)
738{
739 int err;
740
741 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
742 return 0;
743
744 err = nilfs_attach_checkpoint(sbi, ri->ri_cno);
745 if (unlikely(err)) {
746 printk(KERN_ERR
747 "NILFS: error loading the latest checkpoint.\n");
748 return err;
749 }
750
751 err = nilfs_do_roll_forward(nilfs, sbi, ri);
752 if (unlikely(err))
753 goto failed;
754
755 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
756 err = nilfs_prepare_segment_for_recovery(nilfs, ri);
757 if (unlikely(err)) {
758 printk(KERN_ERR "NILFS: Error preparing segments for "
759 "recovery.\n");
760 goto failed;
761 }
762
763 err = nilfs_attach_segment_constructor(sbi, ri);
764 if (unlikely(err))
765 goto failed;
766
767 set_nilfs_discontinued(nilfs);
768 err = nilfs_construct_segment(sbi->s_super);
769 nilfs_detach_segment_constructor(sbi);
770
771 if (unlikely(err)) {
772 printk(KERN_ERR "NILFS: Oops! recovery failed. "
773 "(err=%d)\n", err);
774 goto failed;
775 }
776
777 nilfs_finish_roll_forward(nilfs, sbi, ri);
778 }
779
780 nilfs_detach_checkpoint(sbi);
781 return 0;
782
783 failed:
784 nilfs_detach_checkpoint(sbi);
785 nilfs_mdt_clear(nilfs->ns_cpfile);
786 nilfs_mdt_clear(nilfs->ns_sufile);
787 nilfs_mdt_clear(nilfs->ns_dat);
788 return err;
789}
790
791/**
792 * nilfs_search_super_root - search the latest valid super root
793 * @nilfs: the_nilfs
794 * @sbi: nilfs_sb_info
795 * @ri: pointer to a nilfs_recovery_info struct to store search results.
796 *
797 * nilfs_search_super_root() looks for the latest super-root from a partial
798 * segment pointed by the superblock. It sets up struct the_nilfs through
799 * this search. It fills nilfs_recovery_info (ri) required for recovery.
800 *
801 * Return Value: On success, 0 is returned. On error, one of the following
802 * negative error code is returned.
803 *
804 * %-EINVAL - No valid segment found
805 *
806 * %-EIO - I/O error
807 */
808int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
809 struct nilfs_recovery_info *ri)
810{
811 struct nilfs_segsum_info ssi;
812 sector_t pseg_start, pseg_end, sr_pseg_start = 0;
813 sector_t seg_start, seg_end; /* range of full segment (block number) */
814 u64 seg_seq;
815 __u64 segnum, nextnum = 0;
816 __u64 cno;
817 struct nilfs_segment_entry *ent;
818 LIST_HEAD(segments);
819 int empty_seg = 0, scan_newer = 0;
820 int ret;
821
822 pseg_start = nilfs->ns_last_pseg;
823 seg_seq = nilfs->ns_last_seq;
824 cno = nilfs->ns_last_cno;
825 segnum = nilfs_get_segnum_of_block(nilfs, pseg_start);
826
827 /* Calculate range of segment */
828 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
829
830 for (;;) {
831 /* Load segment summary */
832 ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1);
833 if (ret) {
834 if (ret == NILFS_SEG_FAIL_IO)
835 goto failed;
836 goto strayed;
837 }
838 pseg_end = pseg_start + ssi.nblocks - 1;
839 if (unlikely(pseg_end > seg_end)) {
840 ret = NILFS_SEG_FAIL_CONSISTENCY;
841 goto strayed;
842 }
843
844 /* A valid partial segment */
845 ri->ri_pseg_start = pseg_start;
846 ri->ri_seq = seg_seq;
847 ri->ri_segnum = segnum;
848 nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next);
849 ri->ri_nextnum = nextnum;
850 empty_seg = 0;
851
852 if (!NILFS_SEG_HAS_SR(&ssi)) {
853 if (!scan_newer) {
854 /* This will never happen because a superblock
855 (last_segment) always points to a pseg
856 having a super root. */
857 ret = NILFS_SEG_FAIL_CONSISTENCY;
858 goto failed;
859 }
860 if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) {
861 ri->ri_lsegs_start = pseg_start;
862 ri->ri_lsegs_start_seq = seg_seq;
863 }
864 if (NILFS_SEG_LOGEND(&ssi))
865 ri->ri_lsegs_end = pseg_start;
866 goto try_next_pseg;
867 }
868
869 /* A valid super root was found. */
870 ri->ri_cno = cno++;
871 ri->ri_super_root = pseg_end;
872 ri->ri_lsegs_start = ri->ri_lsegs_end = 0;
873
874 nilfs_dispose_segment_list(&segments);
875 nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start)
876 + ssi.nblocks - seg_start;
877 nilfs->ns_seg_seq = seg_seq;
878 nilfs->ns_segnum = segnum;
879 nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */
880 nilfs->ns_ctime = ssi.ctime;
881 nilfs->ns_nextnum = nextnum;
882
883 if (scan_newer)
884 ri->ri_need_recovery = NILFS_RECOVERY_SR_UPDATED;
885 else if (nilfs->ns_mount_state & NILFS_VALID_FS)
886 goto super_root_found;
887
888 scan_newer = 1;
889
890 /* reset region for roll-forward */
891 pseg_start += ssi.nblocks;
892 if (pseg_start < seg_end)
893 continue;
894 goto feed_segment;
895
896 try_next_pseg:
897 /* Standing on a course, or met an inconsistent state */
898 pseg_start += ssi.nblocks;
899 if (pseg_start < seg_end)
900 continue;
901 goto feed_segment;
902
903 strayed:
904 /* Off the trail */
905 if (!scan_newer)
906 /*
907 * This can happen if a checkpoint was written without
908 * barriers, or as a result of an I/O failure.
909 */
910 goto failed;
911
912 feed_segment:
913 /* Looking to the next full segment */
914 if (empty_seg++)
915 goto super_root_found; /* found a valid super root */
916
917 ent = nilfs_alloc_segment_entry(segnum);
918 if (unlikely(!ent)) {
919 ret = -ENOMEM;
920 goto failed;
921 }
922 list_add_tail(&ent->list, &segments);
923
924 seg_seq++;
925 segnum = nextnum;
926 nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end);
927 pseg_start = seg_start;
928 }
929
930 super_root_found:
931 /* Updating pointers relating to the latest checkpoint */
932 list_splice(&segments, ri->ri_used_segments.prev);
933 nilfs->ns_last_pseg = sr_pseg_start;
934 nilfs->ns_last_seq = nilfs->ns_seg_seq;
935 nilfs->ns_last_cno = ri->ri_cno;
936 return 0;
937
938 failed:
939 nilfs_dispose_segment_list(&segments);
940 return (ret < 0) ? ret : nilfs_warn_segment_error(ret);
941}