diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2007-09-17 05:59:52 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 03:56:24 -0400 |
commit | 16615be18cadf53ee6f8a4f0bdd647f0753421b1 (patch) | |
tree | 670c75e931e6d606211f338ee5e8b1d603c96521 /fs/gfs2/lops.c | |
parent | 55c0c4ac0be144014651b19e77c9b77f367955de (diff) |
[GFS2] Clean up journaled data writing
This patch cleans up the code for writing journaled data into the log.
It also removes the need to allocate a small "tag" structure for each
block written into the log. Instead we just keep count of the outstanding
I/O so that we can be sure that its all been written at the correct time.
Another result of this patch is that a number of ll_rw_block() calls
have become submit_bh() calls, closing some races at the same time.
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/lops.c')
-rw-r--r-- | fs/gfs2/lops.c | 242 |
1 files changed, 126 insertions, 116 deletions
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 342c10e12af2..6c27cea761c6 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -91,6 +91,39 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
91 | unlock_buffer(bh); | 91 | unlock_buffer(bh); |
92 | } | 92 | } |
93 | 93 | ||
94 | |||
95 | static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh) | ||
96 | { | ||
97 | return (struct gfs2_log_descriptor *)bh->b_data; | ||
98 | } | ||
99 | |||
100 | static inline __be64 *bh_log_ptr(struct buffer_head *bh) | ||
101 | { | ||
102 | struct gfs2_log_descriptor *ld = bh_log_desc(bh); | ||
103 | return (__force __be64 *)(ld + 1); | ||
104 | } | ||
105 | |||
106 | static inline __be64 *bh_ptr_end(struct buffer_head *bh) | ||
107 | { | ||
108 | return (__force __be64 *)(bh->b_data + bh->b_size); | ||
109 | } | ||
110 | |||
111 | |||
112 | static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) | ||
113 | { | ||
114 | struct buffer_head *bh = gfs2_log_get_buf(sdp); | ||
115 | struct gfs2_log_descriptor *ld = bh_log_desc(bh); | ||
116 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
117 | ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); | ||
118 | ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); | ||
119 | ld->ld_type = cpu_to_be32(ld_type); | ||
120 | ld->ld_length = 0; | ||
121 | ld->ld_data1 = 0; | ||
122 | ld->ld_data2 = 0; | ||
123 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
124 | return bh; | ||
125 | } | ||
126 | |||
94 | static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) | 127 | static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) |
95 | { | 128 | { |
96 | struct gfs2_glock *gl; | 129 | struct gfs2_glock *gl; |
@@ -181,7 +214,6 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
181 | struct gfs2_log_descriptor *ld; | 214 | struct gfs2_log_descriptor *ld; |
182 | struct gfs2_bufdata *bd1 = NULL, *bd2; | 215 | struct gfs2_bufdata *bd1 = NULL, *bd2; |
183 | unsigned int total; | 216 | unsigned int total; |
184 | unsigned int offset = BUF_OFFSET; | ||
185 | unsigned int limit; | 217 | unsigned int limit; |
186 | unsigned int num; | 218 | unsigned int num; |
187 | unsigned n; | 219 | unsigned n; |
@@ -198,18 +230,12 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
198 | if (total > limit) | 230 | if (total > limit) |
199 | num = limit; | 231 | num = limit; |
200 | gfs2_log_unlock(sdp); | 232 | gfs2_log_unlock(sdp); |
201 | bh = gfs2_log_get_buf(sdp); | 233 | bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); |
202 | gfs2_log_lock(sdp); | 234 | gfs2_log_lock(sdp); |
203 | ld = (struct gfs2_log_descriptor *)bh->b_data; | 235 | ld = bh_log_desc(bh); |
204 | ptr = (__be64 *)(bh->b_data + offset); | 236 | ptr = bh_log_ptr(bh); |
205 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
206 | ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); | ||
207 | ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); | ||
208 | ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA); | ||
209 | ld->ld_length = cpu_to_be32(num + 1); | 237 | ld->ld_length = cpu_to_be32(num + 1); |
210 | ld->ld_data1 = cpu_to_be32(num); | 238 | ld->ld_data1 = cpu_to_be32(num); |
211 | ld->ld_data2 = cpu_to_be32(0); | ||
212 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
213 | 239 | ||
214 | n = 0; | 240 | n = 0; |
215 | list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, | 241 | list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, |
@@ -220,17 +246,17 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) | |||
220 | } | 246 | } |
221 | 247 | ||
222 | gfs2_log_unlock(sdp); | 248 | gfs2_log_unlock(sdp); |
223 | set_buffer_dirty(bh); | 249 | submit_bh(WRITE, bh); |
224 | ll_rw_block(WRITE, 1, &bh); | ||
225 | gfs2_log_lock(sdp); | 250 | gfs2_log_lock(sdp); |
226 | 251 | ||
227 | n = 0; | 252 | n = 0; |
228 | list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, | 253 | list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, |
229 | bd_le.le_list) { | 254 | bd_le.le_list) { |
255 | get_bh(bd2->bd_bh); | ||
230 | gfs2_log_unlock(sdp); | 256 | gfs2_log_unlock(sdp); |
257 | lock_buffer(bd2->bd_bh); | ||
231 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | 258 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); |
232 | set_buffer_dirty(bh); | 259 | submit_bh(WRITE, bh); |
233 | ll_rw_block(WRITE, 1, &bh); | ||
234 | gfs2_log_lock(sdp); | 260 | gfs2_log_lock(sdp); |
235 | if (++n >= num) | 261 | if (++n >= num) |
236 | break; | 262 | break; |
@@ -359,17 +385,11 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
359 | if (!sdp->sd_log_num_revoke) | 385 | if (!sdp->sd_log_num_revoke) |
360 | return; | 386 | return; |
361 | 387 | ||
362 | bh = gfs2_log_get_buf(sdp); | 388 | bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); |
363 | ld = (struct gfs2_log_descriptor *)bh->b_data; | 389 | ld = bh_log_desc(bh); |
364 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
365 | ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); | ||
366 | ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); | ||
367 | ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE); | ||
368 | ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, | 390 | ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, |
369 | sizeof(u64))); | 391 | sizeof(u64))); |
370 | ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); | 392 | ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); |
371 | ld->ld_data2 = cpu_to_be32(0); | ||
372 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
373 | offset = sizeof(struct gfs2_log_descriptor); | 393 | offset = sizeof(struct gfs2_log_descriptor); |
374 | 394 | ||
375 | while (!list_empty(head)) { | 395 | while (!list_empty(head)) { |
@@ -378,8 +398,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
378 | sdp->sd_log_num_revoke--; | 398 | sdp->sd_log_num_revoke--; |
379 | 399 | ||
380 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { | 400 | if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { |
381 | set_buffer_dirty(bh); | 401 | submit_bh(WRITE, bh); |
382 | ll_rw_block(WRITE, 1, &bh); | ||
383 | 402 | ||
384 | bh = gfs2_log_get_buf(sdp); | 403 | bh = gfs2_log_get_buf(sdp); |
385 | mh = (struct gfs2_meta_header *)bh->b_data; | 404 | mh = (struct gfs2_meta_header *)bh->b_data; |
@@ -396,8 +415,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
396 | } | 415 | } |
397 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 416 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
398 | 417 | ||
399 | set_buffer_dirty(bh); | 418 | submit_bh(WRITE, bh); |
400 | ll_rw_block(WRITE, 1, &bh); | ||
401 | } | 419 | } |
402 | 420 | ||
403 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, | 421 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, |
@@ -562,118 +580,110 @@ out: | |||
562 | unlock_buffer(bd->bd_bh); | 580 | unlock_buffer(bd->bd_bh); |
563 | } | 581 | } |
564 | 582 | ||
565 | static int gfs2_check_magic(struct buffer_head *bh) | 583 | static void gfs2_check_magic(struct buffer_head *bh) |
566 | { | 584 | { |
567 | struct page *page = bh->b_page; | ||
568 | void *kaddr; | 585 | void *kaddr; |
569 | __be32 *ptr; | 586 | __be32 *ptr; |
570 | int rv = 0; | ||
571 | 587 | ||
572 | kaddr = kmap_atomic(page, KM_USER0); | 588 | clear_buffer_escaped(bh); |
589 | kaddr = kmap_atomic(bh->b_page, KM_USER0); | ||
573 | ptr = kaddr + bh_offset(bh); | 590 | ptr = kaddr + bh_offset(bh); |
574 | if (*ptr == cpu_to_be32(GFS2_MAGIC)) | 591 | if (*ptr == cpu_to_be32(GFS2_MAGIC)) |
575 | rv = 1; | 592 | set_buffer_escaped(bh); |
576 | kunmap_atomic(kaddr, KM_USER0); | 593 | kunmap_atomic(kaddr, KM_USER0); |
577 | |||
578 | return rv; | ||
579 | } | 594 | } |
580 | 595 | ||
581 | /** | 596 | static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, |
582 | * databuf_lo_before_commit - Scan the data buffers, writing as we go | 597 | struct list_head *list, struct list_head *done, |
583 | * | 598 | unsigned int n) |
584 | */ | ||
585 | |||
586 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | ||
587 | { | 599 | { |
588 | struct gfs2_bufdata *bd1 = NULL, *bd2; | 600 | struct buffer_head *bh1; |
589 | struct buffer_head *bh = NULL,*bh1 = NULL; | ||
590 | struct gfs2_log_descriptor *ld; | 601 | struct gfs2_log_descriptor *ld; |
591 | unsigned int limit; | 602 | struct gfs2_bufdata *bd; |
592 | unsigned int total; | 603 | __be64 *ptr; |
593 | unsigned int num, n; | ||
594 | __be64 *ptr = NULL; | ||
595 | int magic; | ||
596 | 604 | ||
605 | if (!bh) | ||
606 | return; | ||
597 | 607 | ||
598 | limit = databuf_limit(sdp); | 608 | ld = bh_log_desc(bh); |
609 | ld->ld_length = cpu_to_be32(n + 1); | ||
610 | ld->ld_data1 = cpu_to_be32(n); | ||
599 | 611 | ||
612 | ptr = bh_log_ptr(bh); | ||
613 | |||
614 | get_bh(bh); | ||
615 | submit_bh(WRITE, bh); | ||
600 | gfs2_log_lock(sdp); | 616 | gfs2_log_lock(sdp); |
601 | total = sdp->sd_log_num_databuf; | 617 | while(!list_empty(list)) { |
602 | bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, | 618 | bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); |
603 | bd_le.le_list); | 619 | list_move_tail(&bd->bd_le.le_list, done); |
604 | while(total) { | 620 | get_bh(bd->bd_bh); |
605 | num = total; | 621 | while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) { |
606 | if (num > limit) | 622 | gfs2_log_incr_head(sdp); |
607 | num = limit; | 623 | ptr += 2; |
608 | |||
609 | gfs2_log_unlock(sdp); | ||
610 | bh = gfs2_log_get_buf(sdp); | ||
611 | gfs2_log_lock(sdp); | ||
612 | |||
613 | ld = (struct gfs2_log_descriptor *)bh->b_data; | ||
614 | ptr = (__be64 *)(bh->b_data + DATABUF_OFFSET); | ||
615 | ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | ||
616 | ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); | ||
617 | ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); | ||
618 | ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA); | ||
619 | ld->ld_length = cpu_to_be32(num + 1); | ||
620 | ld->ld_data1 = cpu_to_be32(num); | ||
621 | ld->ld_data2 = cpu_to_be32(0); | ||
622 | memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); | ||
623 | |||
624 | n = 0; | ||
625 | list_for_each_entry_continue(bd1, &sdp->sd_log_le_databuf, | ||
626 | bd_le.le_list) { | ||
627 | bh1 = bd1->bd_bh; | ||
628 | |||
629 | magic = gfs2_check_magic(bh1); | ||
630 | *ptr++ = cpu_to_be64(bh1->b_blocknr); | ||
631 | *ptr++ = cpu_to_be64((__u64)magic); | ||
632 | clear_buffer_escaped(bh1); | ||
633 | if (unlikely(magic != 0)) | ||
634 | set_buffer_escaped(bh1); | ||
635 | if (++n >= num) | ||
636 | break; | ||
637 | } | 624 | } |
638 | gfs2_log_unlock(sdp); | 625 | gfs2_log_unlock(sdp); |
639 | if (bh) { | 626 | lock_buffer(bd->bd_bh); |
640 | set_buffer_dirty(bh); | 627 | if (buffer_escaped(bd->bd_bh)) { |
641 | ll_rw_block(WRITE, 1, &bh); | 628 | void *kaddr; |
642 | bh = NULL; | 629 | bh1 = gfs2_log_get_buf(sdp); |
643 | ptr = NULL; | 630 | kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); |
631 | memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), | ||
632 | bh1->b_size); | ||
633 | kunmap_atomic(kaddr, KM_USER0); | ||
634 | *(__be32 *)bh1->b_data = 0; | ||
635 | clear_buffer_escaped(bd->bd_bh); | ||
636 | unlock_buffer(bd->bd_bh); | ||
637 | brelse(bd->bd_bh); | ||
638 | } else { | ||
639 | bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); | ||
644 | } | 640 | } |
645 | n = 0; | 641 | submit_bh(WRITE, bh1); |
646 | gfs2_log_lock(sdp); | 642 | gfs2_log_lock(sdp); |
647 | list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, | 643 | ptr += 2; |
648 | bd_le.le_list) { | 644 | } |
649 | if (!bd2->bd_bh) | 645 | gfs2_log_unlock(sdp); |
650 | continue; | 646 | brelse(bh); |
651 | /* copy buffer if it needs escaping */ | 647 | } |
648 | |||
649 | /** | ||
650 | * databuf_lo_before_commit - Scan the data buffers, writing as we go | ||
651 | * | ||
652 | */ | ||
653 | |||
654 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | ||
655 | { | ||
656 | struct gfs2_bufdata *bd = NULL; | ||
657 | struct buffer_head *bh = NULL; | ||
658 | unsigned int n = 0; | ||
659 | __be64 *ptr = NULL, *end = NULL; | ||
660 | LIST_HEAD(processed); | ||
661 | LIST_HEAD(in_progress); | ||
662 | |||
663 | gfs2_log_lock(sdp); | ||
664 | while (!list_empty(&sdp->sd_log_le_databuf)) { | ||
665 | if (ptr == end) { | ||
652 | gfs2_log_unlock(sdp); | 666 | gfs2_log_unlock(sdp); |
653 | if (unlikely(buffer_escaped(bd2->bd_bh))) { | 667 | gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); |
654 | void *kaddr; | 668 | n = 0; |
655 | struct page *page = bd2->bd_bh->b_page; | 669 | bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA); |
656 | bh = gfs2_log_get_buf(sdp); | 670 | ptr = bh_log_ptr(bh); |
657 | kaddr = kmap_atomic(page, KM_USER0); | 671 | end = bh_ptr_end(bh) - 1; |
658 | memcpy(bh->b_data, | ||
659 | kaddr + bh_offset(bd2->bd_bh), | ||
660 | sdp->sd_sb.sb_bsize); | ||
661 | kunmap_atomic(kaddr, KM_USER0); | ||
662 | *(__be32 *)bh->b_data = 0; | ||
663 | } else { | ||
664 | bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); | ||
665 | } | ||
666 | set_buffer_dirty(bh); | ||
667 | ll_rw_block(WRITE, 1, &bh); | ||
668 | gfs2_log_lock(sdp); | 672 | gfs2_log_lock(sdp); |
669 | if (++n >= num) | 673 | continue; |
670 | break; | ||
671 | } | 674 | } |
672 | bh = NULL; | 675 | bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list); |
673 | BUG_ON(total < num); | 676 | list_move_tail(&bd->bd_le.le_list, &in_progress); |
674 | total -= num; | 677 | gfs2_check_magic(bd->bd_bh); |
678 | *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr); | ||
679 | *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0); | ||
680 | n++; | ||
675 | } | 681 | } |
676 | gfs2_log_unlock(sdp); | 682 | gfs2_log_unlock(sdp); |
683 | gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); | ||
684 | gfs2_log_lock(sdp); | ||
685 | list_splice(&processed, &sdp->sd_log_le_databuf); | ||
686 | gfs2_log_unlock(sdp); | ||
677 | } | 687 | } |
678 | 688 | ||
679 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 689 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
@@ -807,10 +817,10 @@ const struct gfs2_log_operations gfs2_databuf_lops = { | |||
807 | 817 | ||
808 | const struct gfs2_log_operations *gfs2_log_ops[] = { | 818 | const struct gfs2_log_operations *gfs2_log_ops[] = { |
809 | &gfs2_glock_lops, | 819 | &gfs2_glock_lops, |
820 | &gfs2_databuf_lops, | ||
810 | &gfs2_buf_lops, | 821 | &gfs2_buf_lops, |
811 | &gfs2_revoke_lops, | ||
812 | &gfs2_rg_lops, | 822 | &gfs2_rg_lops, |
813 | &gfs2_databuf_lops, | 823 | &gfs2_revoke_lops, |
814 | NULL, | 824 | NULL, |
815 | }; | 825 | }; |
816 | 826 | ||