aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/lops.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2/lops.c')
-rw-r--r--fs/gfs2/lops.c280
1 files changed, 257 insertions, 23 deletions
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index a065f7667238..dd41863810d7 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -428,49 +428,188 @@ static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
428 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); 428 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
429} 429}
430 430
431/**
432 * databuf_lo_add - Add a databuf to the transaction.
433 *
434 * This is used in two distinct cases:
435 * i) In ordered write mode
436 * We put the data buffer on a list so that we can ensure that its
437 * synced to disk at the right time
438 * ii) In journaled data mode
439 * We need to journal the data block in the same way as metadata in
440 * the functions above. The difference is that here we have a tag
441 * which is two __be64's being the block number (as per meta data)
442 * and a flag which says whether the data block needs escaping or
443 * not. This means we need a new log entry for each 251 or so data
444 * blocks, which isn't an enormous overhead but twice as much as
445 * for normal metadata blocks.
446 */
431static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 447static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
432{ 448{
433 get_transaction->tr_touched = 1; 449 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
450 struct gfs2_trans *tr = get_transaction;
451 struct address_space *mapping = bd->bd_bh->b_page->mapping;
452 struct gfs2_inode *ip = get_v2ip(mapping->host);
434 453
454 tr->tr_touched = 1;
455 if (!list_empty(&bd->bd_list_tr) &&
456 (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
457 tr->tr_num_buf++;
458 gfs2_trans_add_gl(bd->bd_gl);
459 list_add(&bd->bd_list_tr, &tr->tr_list_buf);
460 gfs2_pin(sdp, bd->bd_bh);
461 } else {
462 clear_buffer_pinned(bd->bd_bh);
463 }
435 gfs2_log_lock(sdp); 464 gfs2_log_lock(sdp);
465 if (ip->i_di.di_flags & GFS2_DIF_JDATA)
466 sdp->sd_log_num_jdata++;
436 sdp->sd_log_num_databuf++; 467 sdp->sd_log_num_databuf++;
437 list_add(&le->le_list, &sdp->sd_log_le_databuf); 468 list_add(&le->le_list, &sdp->sd_log_le_databuf);
438 gfs2_log_unlock(sdp); 469 gfs2_log_unlock(sdp);
439} 470}
440 471
472static int gfs2_check_magic(struct buffer_head *bh)
473{
474 struct page *page = bh->b_page;
475 void *kaddr;
476 __be32 *ptr;
477 int rv = 0;
478
479 kaddr = kmap_atomic(page, KM_USER0);
480 ptr = kaddr + bh_offset(bh);
481 if (*ptr == cpu_to_be32(GFS2_MAGIC))
482 rv = 1;
483 kunmap_atomic(page, KM_USER0);
484
485 return rv;
486}
487
488/**
489 * databuf_lo_before_commit - Scan the data buffers, writing as we go
490 *
491 * Here we scan through the lists of buffers and make the assumption
492 * that any buffer thats been pinned is being journaled, and that
493 * any unpinned buffer is an ordered write data buffer and therefore
494 * will be written back rather than journaled.
495 */
441static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 496static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
442{ 497{
443 struct list_head *head = &sdp->sd_log_le_databuf;
444 LIST_HEAD(started); 498 LIST_HEAD(started);
445 struct gfs2_bufdata *bd; 499 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
446 struct buffer_head *bh; 500 struct buffer_head *bh = NULL;
501 unsigned int offset = sizeof(struct gfs2_log_descriptor);
502 struct gfs2_log_descriptor *ld;
503 unsigned int limit;
504 unsigned int total_dbuf = sdp->sd_log_num_databuf;
505 unsigned int total_jdata = sdp->sd_log_num_jdata;
506 unsigned int num, n;
507 __be64 *ptr;
447 508
448 while (!list_empty(head)) { 509 offset += (2*sizeof(__be64) - 1);
449 bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list); 510 offset &= ~(2*sizeof(__be64) - 1);
450 list_move(&bd->bd_le.le_list, &started); 511 limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
451 512
452 gfs2_log_lock(sdp); 513 /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
453 bh = bd->bd_bh; 514 /*
515 * Start writing ordered buffers, write journaled buffers
516 * into the log along with a header
517 */
518 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list);
519 while(total_dbuf) {
520 num = total_jdata;
521 if (num > limit)
522 num = limit;
523 n = 0;
524 list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) {
525 gfs2_log_lock(sdp);
526 /* An ordered write buffer */
527 if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
528 list_move(&bd1->bd_le.le_list, &started);
529 if (bd1 == bd2) {
530 bd2 = NULL;
531 bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list);
532 }
533 total_dbuf--;
534 if (bd1->bd_bh) {
535 get_bh(bd1->bd_bh);
536 gfs2_log_unlock(sdp);
537 if (buffer_dirty(bd1->bd_bh)) {
538 wait_on_buffer(bd1->bd_bh);
539 ll_rw_block(WRITE, 1, &bd1->bd_bh);
540 }
541 brelse(bd1->bd_bh);
542 continue;
543 }
544 gfs2_log_unlock(sdp);
545 continue;
546 } else if (bd1->bd_bh) { /* A journaled buffer */
547 int magic;
548 gfs2_log_unlock(sdp);
549 /* printk(KERN_INFO "journaled buffer\n"); */
550 if (!bh) {
551 bh = gfs2_log_get_buf(sdp);
552 ld = (struct gfs2_log_descriptor *)bh->b_data;
553 ptr = (__be64 *)(bh->b_data + offset);
554 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
555 ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
556 ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
557 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA);
558 ld->ld_length = cpu_to_be32(num + 1);
559 ld->ld_data1 = cpu_to_be32(num);
560 ld->ld_data2 = cpu_to_be32(0);
561 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
562 }
563 magic = gfs2_check_magic(bd1->bd_bh);
564 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
565 *ptr++ = cpu_to_be64((__u64)magic);
566 clear_buffer_escaped(bd1->bd_bh);
567 if (unlikely(magic != 0))
568 set_buffer_escaped(bd1->bd_bh);
569 if (n++ > num)
570 break;
571 }
572 }
454 if (bh) { 573 if (bh) {
455 get_bh(bh); 574 set_buffer_dirty(bh);
456 gfs2_log_unlock(sdp); 575 ll_rw_block(WRITE, 1, &bh);
457 if (buffer_dirty(bh)) { 576 bh = NULL;
458 wait_on_buffer(bh); 577 }
459 ll_rw_block(WRITE, 1, &bh); 578 n = 0;
579 /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
580 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) {
581 if (!bd2->bd_bh)
582 continue;
583 /* copy buffer if it needs escaping */
584 if (unlikely(buffer_escaped(bd2->bd_bh))) {
585 void *kaddr;
586 struct page *page = bd2->bd_bh->b_page;
587 bh = gfs2_log_get_buf(sdp);
588 kaddr = kmap_atomic(page, KM_USER0);
589 memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize);
590 kunmap_atomic(page, KM_USER0);
591 *(__be32 *)bh->b_data = 0;
592 } else {
593 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
460 } 594 }
461 brelse(bh); 595 set_buffer_dirty(bh);
462 } else 596 ll_rw_block(WRITE, 1, &bh);
463 gfs2_log_unlock(sdp); 597 if (++n >= num)
598 break;
599 }
600 bh = NULL;
601 total_dbuf -= num;
602 total_jdata -= num;
464 } 603 }
465 604 /* printk(KERN_INFO "wait on ordered data buffers\n"); */
605 /* Wait on all ordered buffers */
466 while (!list_empty(&started)) { 606 while (!list_empty(&started)) {
467 bd = list_entry(started.next, struct gfs2_bufdata, 607 bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list);
468 bd_le.le_list); 608 list_del(&bd1->bd_le.le_list);
469 list_del(&bd->bd_le.le_list);
470 sdp->sd_log_num_databuf--; 609 sdp->sd_log_num_databuf--;
471 610
472 gfs2_log_lock(sdp); 611 gfs2_log_lock(sdp);
473 bh = bd->bd_bh; 612 bh = bd1->bd_bh;
474 if (bh) { 613 if (bh) {
475 set_v2bd(bh, NULL); 614 set_v2bd(bh, NULL);
476 gfs2_log_unlock(sdp); 615 gfs2_log_unlock(sdp);
@@ -479,12 +618,103 @@ static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
479 } else 618 } else
480 gfs2_log_unlock(sdp); 619 gfs2_log_unlock(sdp);
481 620
482 kfree(bd); 621 kfree(bd1);
483 } 622 }
484 623
624 /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */
625 /* We've removed all the ordered write bufs here, so only jdata left */
626 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
627}
628
629static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
630 struct gfs2_log_descriptor *ld,
631 __be64 *ptr, int pass)
632{
633 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
634 struct gfs2_glock *gl = jd->jd_inode->i_gl;
635 unsigned int blks = be32_to_cpu(ld->ld_data1);
636 struct buffer_head *bh_log, *bh_ip;
637 uint64_t blkno;
638 uint64_t esc;
639 int error = 0;
640
641 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
642 return 0;
643
644 gfs2_replay_incr_blk(sdp, &start);
645 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
646 blkno = be64_to_cpu(*ptr++);
647 esc = be64_to_cpu(*ptr++);
648
649 sdp->sd_found_blocks++;
650
651 if (gfs2_revoke_check(sdp, blkno, start))
652 continue;
653
654 error = gfs2_replay_read_block(jd, start, &bh_log);
655 if (error)
656 return error;
657
658 bh_ip = gfs2_meta_new(gl, blkno);
659 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
660
661 /* Unescape */
662 if (esc) {
663 __be32 *eptr = (__be32 *)bh_ip->b_data;
664 *eptr = cpu_to_be32(GFS2_MAGIC);
665 }
666 mark_buffer_dirty(bh_ip);
667
668 brelse(bh_log);
669 brelse(bh_ip);
670 if (error)
671 break;
672
673 sdp->sd_replayed_blocks++;
674 }
675
676 return error;
677}
678
679/* FIXME: sort out accounting for log blocks etc. */
680
681static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
682{
683 struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
684
685 if (error) {
686 gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
687 return;
688 }
689 if (pass != 1)
690 return;
691
692 /* data sync? */
693 gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
694
695 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
696 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
697}
698
699static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
700{
701 struct list_head *head = &sdp->sd_log_le_databuf;
702 struct gfs2_bufdata *bd;
703
704 while (!list_empty(head)) {
705 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
706 list_del_init(&bd->bd_le.le_list);
707 sdp->sd_log_num_databuf--;
708 sdp->sd_log_num_jdata--;
709 gfs2_unpin(sdp, bd->bd_bh, ai);
710 brelse(bd->bd_bh);
711 kfree(bd);
712 }
485 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 713 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
714 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
486} 715}
487 716
717
488struct gfs2_log_operations gfs2_glock_lops = { 718struct gfs2_log_operations gfs2_glock_lops = {
489 .lo_add = glock_lo_add, 719 .lo_add = glock_lo_add,
490 .lo_after_commit = glock_lo_after_commit, 720 .lo_after_commit = glock_lo_after_commit,
@@ -519,7 +749,11 @@ struct gfs2_log_operations gfs2_rg_lops = {
519 749
520struct gfs2_log_operations gfs2_databuf_lops = { 750struct gfs2_log_operations gfs2_databuf_lops = {
521 .lo_add = databuf_lo_add, 751 .lo_add = databuf_lo_add,
752 .lo_incore_commit = buf_lo_incore_commit,
522 .lo_before_commit = databuf_lo_before_commit, 753 .lo_before_commit = databuf_lo_before_commit,
754 .lo_after_commit = databuf_lo_after_commit,
755 .lo_scan_elements = databuf_lo_scan_elements,
756 .lo_after_scan = databuf_lo_after_scan,
523 .lo_name = "databuf" 757 .lo_name = "databuf"
524}; 758};
525 759