diff options
author | Mikulas Patocka <mpatocka@redhat.com> | 2017-04-30 17:31:22 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2017-08-28 11:47:17 -0400 |
commit | 1e3b21c6fb671a5ce9d77a05a8bde805d8908467 (patch) | |
tree | 58617942ff3b8497c396750161e8b29416e970c2 | |
parent | dc6364b5170dc446fca076d6523aaebc339d6511 (diff) |
dm integrity: optimize writing dm-bufio buffers that are partially changed
Rather than write the entire dm-bufio buffer when only a subset is
changed, improve dm-bufio (and dm-integrity) by only writing the subset
of the buffer that changed.
Update dm-integrity to make use of dm-bufio's new
dm_bufio_mark_partial_buffer_dirty() interface.
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r-- | drivers/md/dm-bufio.c | 95 | ||||
-rw-r--r-- | drivers/md/dm-bufio.h | 9 | ||||
-rw-r--r-- | drivers/md/dm-integrity.c | 2 |
3 files changed, 77 insertions, 29 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 44f4a8ac95bd..94e050b395df 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c | |||
@@ -64,6 +64,12 @@ | |||
64 | #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) | 64 | #define DM_BUFIO_BLOCK_SIZE_GFP_LIMIT (PAGE_SIZE << (MAX_ORDER - 1)) |
65 | 65 | ||
66 | /* | 66 | /* |
67 | * Align buffer writes to this boundary. | ||
68 | * Tests show that SSDs have the highest IOPS when using 4k writes. | ||
69 | */ | ||
70 | #define DM_BUFIO_WRITE_ALIGN 4096 | ||
71 | |||
72 | /* | ||
67 | * dm_buffer->list_mode | 73 | * dm_buffer->list_mode |
68 | */ | 74 | */ |
69 | #define LIST_CLEAN 0 | 75 | #define LIST_CLEAN 0 |
@@ -149,6 +155,10 @@ struct dm_buffer { | |||
149 | blk_status_t write_error; | 155 | blk_status_t write_error; |
150 | unsigned long state; | 156 | unsigned long state; |
151 | unsigned long last_accessed; | 157 | unsigned long last_accessed; |
158 | unsigned dirty_start; | ||
159 | unsigned dirty_end; | ||
160 | unsigned write_start; | ||
161 | unsigned write_end; | ||
152 | struct dm_bufio_client *c; | 162 | struct dm_bufio_client *c; |
153 | struct list_head write_list; | 163 | struct list_head write_list; |
154 | struct bio bio; | 164 | struct bio bio; |
@@ -560,7 +570,7 @@ static void dmio_complete(unsigned long error, void *context) | |||
560 | } | 570 | } |
561 | 571 | ||
562 | static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, | 572 | static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, |
563 | unsigned n_sectors, bio_end_io_t *end_io) | 573 | unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) |
564 | { | 574 | { |
565 | int r; | 575 | int r; |
566 | struct dm_io_request io_req = { | 576 | struct dm_io_request io_req = { |
@@ -578,10 +588,10 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t sector, | |||
578 | 588 | ||
579 | if (b->data_mode != DATA_MODE_VMALLOC) { | 589 | if (b->data_mode != DATA_MODE_VMALLOC) { |
580 | io_req.mem.type = DM_IO_KMEM; | 590 | io_req.mem.type = DM_IO_KMEM; |
581 | io_req.mem.ptr.addr = b->data; | 591 | io_req.mem.ptr.addr = (char *)b->data + offset; |
582 | } else { | 592 | } else { |
583 | io_req.mem.type = DM_IO_VMA; | 593 | io_req.mem.type = DM_IO_VMA; |
584 | io_req.mem.ptr.vma = b->data; | 594 | io_req.mem.ptr.vma = (char *)b->data + offset; |
585 | } | 595 | } |
586 | 596 | ||
587 | b->bio.bi_end_io = end_io; | 597 | b->bio.bi_end_io = end_io; |
@@ -609,10 +619,10 @@ static void inline_endio(struct bio *bio) | |||
609 | } | 619 | } |
610 | 620 | ||
611 | static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, | 621 | static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, |
612 | unsigned n_sectors, bio_end_io_t *end_io) | 622 | unsigned n_sectors, unsigned offset, bio_end_io_t *end_io) |
613 | { | 623 | { |
614 | char *ptr; | 624 | char *ptr; |
615 | int len; | 625 | unsigned len; |
616 | 626 | ||
617 | bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); | 627 | bio_init(&b->bio, b->bio_vec, DM_BUFIO_INLINE_VECS); |
618 | b->bio.bi_iter.bi_sector = sector; | 628 | b->bio.bi_iter.bi_sector = sector; |
@@ -625,29 +635,20 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t sector, | |||
625 | b->bio.bi_private = end_io; | 635 | b->bio.bi_private = end_io; |
626 | bio_set_op_attrs(&b->bio, rw, 0); | 636 | bio_set_op_attrs(&b->bio, rw, 0); |
627 | 637 | ||
628 | /* | 638 | ptr = (char *)b->data + offset; |
629 | * We assume that if len >= PAGE_SIZE ptr is page-aligned. | ||
630 | * If len < PAGE_SIZE the buffer doesn't cross page boundary. | ||
631 | */ | ||
632 | ptr = b->data; | ||
633 | len = n_sectors << SECTOR_SHIFT; | 639 | len = n_sectors << SECTOR_SHIFT; |
634 | 640 | ||
635 | if (len >= PAGE_SIZE) | ||
636 | BUG_ON((unsigned long)ptr & (PAGE_SIZE - 1)); | ||
637 | else | ||
638 | BUG_ON((unsigned long)ptr & (len - 1)); | ||
639 | |||
640 | do { | 641 | do { |
641 | if (!bio_add_page(&b->bio, virt_to_page(ptr), | 642 | unsigned this_step = min((unsigned)(PAGE_SIZE - offset_in_page(ptr)), len); |
642 | len < PAGE_SIZE ? len : PAGE_SIZE, | 643 | if (!bio_add_page(&b->bio, virt_to_page(ptr), this_step, |
643 | offset_in_page(ptr))) { | 644 | offset_in_page(ptr))) { |
644 | BUG_ON(b->c->block_size <= PAGE_SIZE); | 645 | BUG_ON(b->c->block_size <= PAGE_SIZE); |
645 | use_dmio(b, rw, sector, n_sectors, end_io); | 646 | use_dmio(b, rw, sector, n_sectors, offset, end_io); |
646 | return; | 647 | return; |
647 | } | 648 | } |
648 | 649 | ||
649 | len -= PAGE_SIZE; | 650 | len -= this_step; |
650 | ptr += PAGE_SIZE; | 651 | ptr += this_step; |
651 | } while (len > 0); | 652 | } while (len > 0); |
652 | 653 | ||
653 | submit_bio(&b->bio); | 654 | submit_bio(&b->bio); |
@@ -657,18 +658,33 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io) | |||
657 | { | 658 | { |
658 | unsigned n_sectors; | 659 | unsigned n_sectors; |
659 | sector_t sector; | 660 | sector_t sector; |
660 | 661 | unsigned offset, end; | |
661 | if (rw == WRITE && b->c->write_callback) | ||
662 | b->c->write_callback(b); | ||
663 | 662 | ||
664 | sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; | 663 | sector = (b->block << b->c->sectors_per_block_bits) + b->c->start; |
665 | n_sectors = 1 << b->c->sectors_per_block_bits; | 664 | |
665 | if (rw != WRITE) { | ||
666 | n_sectors = 1 << b->c->sectors_per_block_bits; | ||
667 | offset = 0; | ||
668 | } else { | ||
669 | if (b->c->write_callback) | ||
670 | b->c->write_callback(b); | ||
671 | offset = b->write_start; | ||
672 | end = b->write_end; | ||
673 | offset &= -DM_BUFIO_WRITE_ALIGN; | ||
674 | end += DM_BUFIO_WRITE_ALIGN - 1; | ||
675 | end &= -DM_BUFIO_WRITE_ALIGN; | ||
676 | if (unlikely(end > b->c->block_size)) | ||
677 | end = b->c->block_size; | ||
678 | |||
679 | sector += offset >> SECTOR_SHIFT; | ||
680 | n_sectors = (end - offset) >> SECTOR_SHIFT; | ||
681 | } | ||
666 | 682 | ||
667 | if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && | 683 | if (n_sectors <= ((DM_BUFIO_INLINE_VECS * PAGE_SIZE) >> SECTOR_SHIFT) && |
668 | b->data_mode != DATA_MODE_VMALLOC) | 684 | b->data_mode != DATA_MODE_VMALLOC) |
669 | use_inline_bio(b, rw, sector, n_sectors, end_io); | 685 | use_inline_bio(b, rw, sector, n_sectors, offset, end_io); |
670 | else | 686 | else |
671 | use_dmio(b, rw, sector, n_sectors, end_io); | 687 | use_dmio(b, rw, sector, n_sectors, offset, end_io); |
672 | } | 688 | } |
673 | 689 | ||
674 | /*---------------------------------------------------------------- | 690 | /*---------------------------------------------------------------- |
@@ -720,6 +736,9 @@ static void __write_dirty_buffer(struct dm_buffer *b, | |||
720 | clear_bit(B_DIRTY, &b->state); | 736 | clear_bit(B_DIRTY, &b->state); |
721 | wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); | 737 | wait_on_bit_lock_io(&b->state, B_WRITING, TASK_UNINTERRUPTIBLE); |
722 | 738 | ||
739 | b->write_start = b->dirty_start; | ||
740 | b->write_end = b->dirty_end; | ||
741 | |||
723 | if (!write_list) | 742 | if (!write_list) |
724 | submit_io(b, WRITE, write_endio); | 743 | submit_io(b, WRITE, write_endio); |
725 | else | 744 | else |
@@ -1221,19 +1240,37 @@ void dm_bufio_release(struct dm_buffer *b) | |||
1221 | } | 1240 | } |
1222 | EXPORT_SYMBOL_GPL(dm_bufio_release); | 1241 | EXPORT_SYMBOL_GPL(dm_bufio_release); |
1223 | 1242 | ||
1224 | void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) | 1243 | void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, |
1244 | unsigned start, unsigned end) | ||
1225 | { | 1245 | { |
1226 | struct dm_bufio_client *c = b->c; | 1246 | struct dm_bufio_client *c = b->c; |
1227 | 1247 | ||
1248 | BUG_ON(start >= end); | ||
1249 | BUG_ON(end > b->c->block_size); | ||
1250 | |||
1228 | dm_bufio_lock(c); | 1251 | dm_bufio_lock(c); |
1229 | 1252 | ||
1230 | BUG_ON(test_bit(B_READING, &b->state)); | 1253 | BUG_ON(test_bit(B_READING, &b->state)); |
1231 | 1254 | ||
1232 | if (!test_and_set_bit(B_DIRTY, &b->state)) | 1255 | if (!test_and_set_bit(B_DIRTY, &b->state)) { |
1256 | b->dirty_start = start; | ||
1257 | b->dirty_end = end; | ||
1233 | __relink_lru(b, LIST_DIRTY); | 1258 | __relink_lru(b, LIST_DIRTY); |
1259 | } else { | ||
1260 | if (start < b->dirty_start) | ||
1261 | b->dirty_start = start; | ||
1262 | if (end > b->dirty_end) | ||
1263 | b->dirty_end = end; | ||
1264 | } | ||
1234 | 1265 | ||
1235 | dm_bufio_unlock(c); | 1266 | dm_bufio_unlock(c); |
1236 | } | 1267 | } |
1268 | EXPORT_SYMBOL_GPL(dm_bufio_mark_partial_buffer_dirty); | ||
1269 | |||
1270 | void dm_bufio_mark_buffer_dirty(struct dm_buffer *b) | ||
1271 | { | ||
1272 | dm_bufio_mark_partial_buffer_dirty(b, 0, b->c->block_size); | ||
1273 | } | ||
1237 | EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); | 1274 | EXPORT_SYMBOL_GPL(dm_bufio_mark_buffer_dirty); |
1238 | 1275 | ||
1239 | void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) | 1276 | void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c) |
@@ -1398,6 +1435,8 @@ retry: | |||
1398 | wait_on_bit_io(&b->state, B_WRITING, | 1435 | wait_on_bit_io(&b->state, B_WRITING, |
1399 | TASK_UNINTERRUPTIBLE); | 1436 | TASK_UNINTERRUPTIBLE); |
1400 | set_bit(B_DIRTY, &b->state); | 1437 | set_bit(B_DIRTY, &b->state); |
1438 | b->dirty_start = 0; | ||
1439 | b->dirty_end = c->block_size; | ||
1401 | __unlink_buffer(b); | 1440 | __unlink_buffer(b); |
1402 | __link_buffer(b, new_block, LIST_DIRTY); | 1441 | __link_buffer(b, new_block, LIST_DIRTY); |
1403 | } else { | 1442 | } else { |
diff --git a/drivers/md/dm-bufio.h b/drivers/md/dm-bufio.h index b6d8f53ec15b..be732d3f8611 100644 --- a/drivers/md/dm-bufio.h +++ b/drivers/md/dm-bufio.h | |||
@@ -94,6 +94,15 @@ void dm_bufio_release(struct dm_buffer *b); | |||
94 | void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); | 94 | void dm_bufio_mark_buffer_dirty(struct dm_buffer *b); |
95 | 95 | ||
96 | /* | 96 | /* |
97 | * Mark a part of the buffer dirty. | ||
98 | * | ||
99 | * The specified part of the buffer is scheduled to be written. dm-bufio may | ||
100 | * write the specified part of the buffer or it may write a larger superset. | ||
101 | */ | ||
102 | void dm_bufio_mark_partial_buffer_dirty(struct dm_buffer *b, | ||
103 | unsigned start, unsigned end); | ||
104 | |||
105 | /* | ||
97 | * Initiate writing of dirty buffers, without waiting for completion. | 106 | * Initiate writing of dirty buffers, without waiting for completion. |
98 | */ | 107 | */ |
99 | void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c); | 108 | void dm_bufio_write_dirty_buffers_async(struct dm_bufio_client *c); |
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 3acce09bba35..689f89d8eeef 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c | |||
@@ -1040,7 +1040,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se | |||
1040 | memcpy(tag, dp, to_copy); | 1040 | memcpy(tag, dp, to_copy); |
1041 | } else if (op == TAG_WRITE) { | 1041 | } else if (op == TAG_WRITE) { |
1042 | memcpy(dp, tag, to_copy); | 1042 | memcpy(dp, tag, to_copy); |
1043 | dm_bufio_mark_buffer_dirty(b); | 1043 | dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy); |
1044 | } else { | 1044 | } else { |
1045 | /* e.g.: op == TAG_CMP */ | 1045 | /* e.g.: op == TAG_CMP */ |
1046 | if (unlikely(memcmp(dp, tag, to_copy))) { | 1046 | if (unlikely(memcmp(dp, tag, to_copy))) { |