diff options
Diffstat (limited to 'fs/gfs2/lops.c')
-rw-r--r-- | fs/gfs2/lops.c | 280 |
1 files changed, 257 insertions, 23 deletions
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index a065f7667238..dd41863810d7 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | |||
428 | gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); | 428 | gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); |
429 | } | 429 | } |
430 | 430 | ||
431 | /** | ||
432 | * databuf_lo_add - Add a databuf to the transaction. | ||
433 | * | ||
434 | * This is used in two distinct cases: | ||
435 | * i) In ordered write mode | ||
436 | * We put the data buffer on a list so that we can ensure that its | ||
437 | * synced to disk at the right time | ||
438 | * ii) In journaled data mode | ||
439 | * We need to journal the data block in the same way as metadata in | ||
440 | * the functions above. The difference is that here we have a tag | ||
441 | * which is two __be64's being the block number (as per meta data) | ||
442 | * and a flag which says whether the data block needs escaping or | ||
443 | * not. This means we need a new log entry for each 251 or so data | ||
444 | * blocks, which isn't an enormous overhead but twice as much as | ||
445 | * for normal metadata blocks. | ||
446 | */ | ||
431 | static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | 447 | static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) |
432 | { | 448 | { |
433 | get_transaction->tr_touched = 1; | 449 | struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); |
450 | struct gfs2_trans *tr = get_transaction; | ||
451 | struct address_space *mapping = bd->bd_bh->b_page->mapping; | ||
452 | struct gfs2_inode *ip = get_v2ip(mapping->host); | ||
434 | 453 | ||
454 | tr->tr_touched = 1; | ||
455 | if (!list_empty(&bd->bd_list_tr) && | ||
456 | (ip->i_di.di_flags & GFS2_DIF_JDATA)) { | ||
457 | tr->tr_num_buf++; | ||
458 | gfs2_trans_add_gl(bd->bd_gl); | ||
459 | list_add(&bd->bd_list_tr, &tr->tr_list_buf); | ||
460 | gfs2_pin(sdp, bd->bd_bh); | ||
461 | } else { | ||
462 | clear_buffer_pinned(bd->bd_bh); | ||
463 | } | ||
435 | gfs2_log_lock(sdp); | 464 | gfs2_log_lock(sdp); |
465 | if (ip->i_di.di_flags & GFS2_DIF_JDATA) | ||
466 | sdp->sd_log_num_jdata++; | ||
436 | sdp->sd_log_num_databuf++; | 467 | sdp->sd_log_num_databuf++; |
437 | list_add(&le->le_list, &sdp->sd_log_le_databuf); | 468 | list_add(&le->le_list, &sdp->sd_log_le_databuf); |
438 | gfs2_log_unlock(sdp); | 469 | gfs2_log_unlock(sdp); |
439 | } | 470 | } |
440 | 471 | ||
472 | static int gfs2_check_magic(struct buffer_head *bh) | ||
473 | { | ||
474 | struct page *page = bh->b_page; | ||
475 | void *kaddr; | ||
476 | __be32 *ptr; | ||
477 | int rv = 0; | ||
478 | |||
479 | kaddr = kmap_atomic(page, KM_USER0); | ||
480 | ptr = kaddr + bh_offset(bh); | ||
481 | if (*ptr == cpu_to_be32(GFS2_MAGIC)) | ||
482 | rv = 1; | ||
483 | kunmap_atomic(page, KM_USER0); | ||
484 | |||
485 | return rv; | ||
486 | } | ||
487 | |||
488 | /** | ||
489 | * databuf_lo_before_commit - Scan the data buffers, writing as we go | ||
490 | * | ||
491 | * Here we scan through the lists of buffers and make the assumption | ||
492 | * that any buffer thats been pinned is being journaled, and that | ||
493 | * any unpinned buffer is an ordered write data buffer and therefore | ||
494 | * will be written back rather than journaled. | ||
495 | */ | ||
441 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | 496 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) |
442 | { | 497 | { |
443 | struct list_head *head = &sdp->sd_log_le_databuf; | ||
444 | LIST_HEAD(started); | 498 | LIST_HEAD(started); |
445 | struct gfs2_bufdata *bd; | 499 | struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; |
446 | struct buffer_head *bh; | 500 | struct buffer_head *bh = NULL; |
501 | unsigned int offset = sizeof(struct gfs2_log_descriptor); | ||
502 | struct gfs2_log_descriptor *ld; | ||
503 | unsigned int limit; | ||
504 | unsigned int total_dbuf = sdp->sd_log_num_databuf; | ||
505 | unsigned int total_jdata = sdp->sd_log_num_jdata; | ||
506 | unsigned int num, n; | ||
507 | __be64 *ptr; | ||
447 | 508 | ||
448 | while (!list_empty(head)) { | 509 | offset += (2*sizeof(__be64) - 1); |
449 | bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list); | 510 | offset &= ~(2*sizeof(__be64) - 1); |
450 | list_move(&bd->bd_le.le_list, &started); | 511 | limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64); |
451 | 512 | ||
452 | gfs2_log_lock(sdp); | 513 | /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ |
453 | bh = bd->bd_bh; | 514 | /* |
515 | * Start writing ordered buffers, write journaled buffers | ||
516 | * into the log along with a header | ||
517 | */ | ||
518 | bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list); | ||
519 | while(total_dbuf) { | ||
520 | num = total_jdata; | ||
521 | if (num > limit) | ||
522 | num = limit; | ||
523 | n = 0; | ||
524 | list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) { | ||
525 | gfs2_log_lock(sdp); | ||
526 | /* An ordered write buffer */ | ||
527 | if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) { | ||
528 | list_move(&bd1->bd_le.le_list, &started); | ||
529 | if (bd1 == bd2) { | ||
530 | bd2 = NULL; | ||
531 | bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list); | ||
532 | } | ||
533 | total_dbuf--; | ||
534 | if (bd1->bd_bh) { | ||
535 | get_bh(bd1->bd_bh); | ||
536 | gfs2_log_unlock(sdp); | ||
537 | if (buffer_dirty(bd1->bd_bh)) { | ||
538 | wait_on_buffer(bd1->bd_bh); | ||
539 | ll_rw_block(WRITE, 1, &bd1->bd_bh); | ||
540 | } | ||
541 | brelse(bd1->bd_bh); | ||
542 | continue; | ||
543 | } | ||
544 | gfs2_log_unlock(sdp); | ||
545 | continue; | ||
546 | } else if (bd1->bd_bh) { /* A journaled buffer */ | ||
547 | int magic; | ||
548 | gfs2_log_unlock(sdp); | ||
549 | /* printk(KERN_INFO "journaled buffer\n"); */ | ||
550 | if (!bh) { | ||
551 | bh = gfs2_log_get_buf(sdp); | ||
552 | ld = (struct gfs2_log_descriptor *)bh->b_data; | ||
553 | ptr = (__be64 *)(bh->b_data + offset); | ||
554 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
555 | ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD); | ||
556 | ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD); | ||
557 | ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA); | ||
558 | ld->ld_length = cpu_to_be32(num + 1); | ||
559 | ld->ld_data1 = cpu_to_be32(num); | ||
560 | ld->ld_data2 = cpu_to_be32(0); | ||
561 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
562 | } | ||
563 | magic = gfs2_check_magic(bd1->bd_bh); | ||
564 | *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); | ||
565 | *ptr++ = cpu_to_be64((__u64)magic); | ||
566 | clear_buffer_escaped(bd1->bd_bh); | ||
567 | if (unlikely(magic != 0)) | ||
568 | set_buffer_escaped(bd1->bd_bh); | ||
569 | if (n++ > num) | ||
570 | break; | ||
571 | } | ||
572 | } | ||
454 | if (bh) { | 573 | if (bh) { |
455 | get_bh(bh); | 574 | set_buffer_dirty(bh); |
456 | gfs2_log_unlock(sdp); | 575 | ll_rw_block(WRITE, 1, &bh); |
457 | if (buffer_dirty(bh)) { | 576 | bh = NULL; |
458 | wait_on_buffer(bh); | 577 | } |
459 | ll_rw_block(WRITE, 1, &bh); | 578 | n = 0; |
579 | /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */ | ||
580 | list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) { | ||
581 | if (!bd2->bd_bh) | ||
582 | continue; | ||
583 | /* copy buffer if it needs escaping */ | ||
584 | if (unlikely(buffer_escaped(bd2->bd_bh))) { | ||
585 | void *kaddr; | ||
586 | struct page *page = bd2->bd_bh->b_page; | ||
587 | bh = gfs2_log_get_buf(sdp); | ||
588 | kaddr = kmap_atomic(page, KM_USER0); | ||
589 | memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize); | ||
590 | kunmap_atomic(page, KM_USER0); | ||
591 | *(__be32 *)bh->b_data = 0; | ||
592 | } else { | ||
593 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | ||
460 | } | 594 | } |
461 | brelse(bh); | 595 | set_buffer_dirty(bh); |
462 | } else | 596 | ll_rw_block(WRITE, 1, &bh); |
463 | gfs2_log_unlock(sdp); | 597 | if (++n >= num) |
598 | break; | ||
599 | } | ||
600 | bh = NULL; | ||
601 | total_dbuf -= num; | ||
602 | total_jdata -= num; | ||
464 | } | 603 | } |
465 | 604 | /* printk(KERN_INFO "wait on ordered data buffers\n"); */ | |
605 | /* Wait on all ordered buffers */ | ||
466 | while (!list_empty(&started)) { | 606 | while (!list_empty(&started)) { |
467 | bd = list_entry(started.next, struct gfs2_bufdata, | 607 | bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list); |
468 | bd_le.le_list); | 608 | list_del(&bd1->bd_le.le_list); |
469 | list_del(&bd->bd_le.le_list); | ||
470 | sdp->sd_log_num_databuf--; | 609 | sdp->sd_log_num_databuf--; |
471 | 610 | ||
472 | gfs2_log_lock(sdp); | 611 | gfs2_log_lock(sdp); |
473 | bh = bd->bd_bh; | 612 | bh = bd1->bd_bh; |
474 | if (bh) { | 613 | if (bh) { |
475 | set_v2bd(bh, NULL); | 614 | set_v2bd(bh, NULL); |
476 | gfs2_log_unlock(sdp); | 615 | gfs2_log_unlock(sdp); |
@@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | |||
479 | } else | 618 | } else |
480 | gfs2_log_unlock(sdp); | 619 | gfs2_log_unlock(sdp); |
481 | 620 | ||
482 | kfree(bd); | 621 | kfree(bd1); |
483 | } | 622 | } |
484 | 623 | ||
624 | /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */ | ||
625 | /* We've removed all the ordered write bufs here, so only jdata left */ | ||
626 | gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); | ||
627 | } | ||
628 | |||
629 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | ||
630 | struct gfs2_log_descriptor *ld, | ||
631 | __be64 *ptr, int pass) | ||
632 | { | ||
633 | struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; | ||
634 | struct gfs2_glock *gl = jd->jd_inode->i_gl; | ||
635 | unsigned int blks = be32_to_cpu(ld->ld_data1); | ||
636 | struct buffer_head *bh_log, *bh_ip; | ||
637 | uint64_t blkno; | ||
638 | uint64_t esc; | ||
639 | int error = 0; | ||
640 | |||
641 | if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) | ||
642 | return 0; | ||
643 | |||
644 | gfs2_replay_incr_blk(sdp, &start); | ||
645 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { | ||
646 | blkno = be64_to_cpu(*ptr++); | ||
647 | esc = be64_to_cpu(*ptr++); | ||
648 | |||
649 | sdp->sd_found_blocks++; | ||
650 | |||
651 | if (gfs2_revoke_check(sdp, blkno, start)) | ||
652 | continue; | ||
653 | |||
654 | error = gfs2_replay_read_block(jd, start, &bh_log); | ||
655 | if (error) | ||
656 | return error; | ||
657 | |||
658 | bh_ip = gfs2_meta_new(gl, blkno); | ||
659 | memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); | ||
660 | |||
661 | /* Unescape */ | ||
662 | if (esc) { | ||
663 | __be32 *eptr = (__be32 *)bh_ip->b_data; | ||
664 | *eptr = cpu_to_be32(GFS2_MAGIC); | ||
665 | } | ||
666 | mark_buffer_dirty(bh_ip); | ||
667 | |||
668 | brelse(bh_log); | ||
669 | brelse(bh_ip); | ||
670 | if (error) | ||
671 | break; | ||
672 | |||
673 | sdp->sd_replayed_blocks++; | ||
674 | } | ||
675 | |||
676 | return error; | ||
677 | } | ||
678 | |||
679 | /* FIXME: sort out accounting for log blocks etc. */ | ||
680 | |||
681 | static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | ||
682 | { | ||
683 | struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; | ||
684 | |||
685 | if (error) { | ||
686 | gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); | ||
687 | return; | ||
688 | } | ||
689 | if (pass != 1) | ||
690 | return; | ||
691 | |||
692 | /* data sync? */ | ||
693 | gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); | ||
694 | |||
695 | fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", | ||
696 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | ||
697 | } | ||
698 | |||
699 | static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | ||
700 | { | ||
701 | struct list_head *head = &sdp->sd_log_le_databuf; | ||
702 | struct gfs2_bufdata *bd; | ||
703 | |||
704 | while (!list_empty(head)) { | ||
705 | bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); | ||
706 | list_del_init(&bd->bd_le.le_list); | ||
707 | sdp->sd_log_num_databuf--; | ||
708 | sdp->sd_log_num_jdata--; | ||
709 | gfs2_unpin(sdp, bd->bd_bh, ai); | ||
710 | brelse(bd->bd_bh); | ||
711 | kfree(bd); | ||
712 | } | ||
485 | gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); | 713 | gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); |
714 | gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); | ||
486 | } | 715 | } |
487 | 716 | ||
717 | |||
488 | struct gfs2_log_operations gfs2_glock_lops = { | 718 | struct gfs2_log_operations gfs2_glock_lops = { |
489 | .lo_add = glock_lo_add, | 719 | .lo_add = glock_lo_add, |
490 | .lo_after_commit = glock_lo_after_commit, | 720 | .lo_after_commit = glock_lo_after_commit, |
@@ -519,7 +749,11 @@ struct gfs2_log_operations gfs2_rg_lops = { | |||
519 | 749 | ||
520 | struct gfs2_log_operations gfs2_databuf_lops = { | 750 | struct gfs2_log_operations gfs2_databuf_lops = { |
521 | .lo_add = databuf_lo_add, | 751 | .lo_add = databuf_lo_add, |
752 | .lo_incore_commit = buf_lo_incore_commit, | ||
522 | .lo_before_commit = databuf_lo_before_commit, | 753 | .lo_before_commit = databuf_lo_before_commit, |
754 | .lo_after_commit = databuf_lo_after_commit, | ||
755 | .lo_scan_elements = databuf_lo_scan_elements, | ||
756 | .lo_after_scan = databuf_lo_after_scan, | ||
523 | .lo_name = "databuf" | 757 | .lo_name = "databuf" |
524 | }; | 758 | }; |
525 | 759 | ||