aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2019-04-29 08:57:24 -0400
committerMike Snitzer <snitzer@redhat.com>2019-05-08 13:41:58 -0400
commit468dfca38b1a6fbdccd195d875599cb7c8875cd9 (patch)
tree607472ff36499f4d1203fd12b74654c7ae7aba76
parent8b3bbd490d880db1377c71daf9c929c8446c8375 (diff)
dm integrity: add a bitmap mode
Introduce an alternate mode of operation where dm-integrity uses a bitmap instead of a journal. If a bit in the bitmap is 1, the corresponding region's data and integrity tags are not synchronized - if the machine crashes, the unsynchronized regions will be recalculated. The bitmap mode is faster than the journal mode, because we don't have to write the data twice, but it is also less reliable, because if data corruption happens when the machine crashes, it may not be detected. Benchmark results for an SSD connected to a SATA300 port, when doing large linear writes with dd: buffered I/O: raw device throughput - 245MB/s dm-integrity with journaling - 120MB/s dm-integrity with bitmap - 238MB/s direct I/O with 1MB block size: raw device throughput - 248MB/s dm-integrity with journaling - 123MB/s dm-integrity with bitmap - 223MB/s For more info see dm-integrity in Documentation/device-mapper/ Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--Documentation/device-mapper/dm-integrity.txt22
-rw-r--r--drivers/md/dm-integrity.c536
2 files changed, 525 insertions, 33 deletions
diff --git a/Documentation/device-mapper/dm-integrity.txt b/Documentation/device-mapper/dm-integrity.txt
index 7dc9180cdeac..d63d78ffeb73 100644
--- a/Documentation/device-mapper/dm-integrity.txt
+++ b/Documentation/device-mapper/dm-integrity.txt
@@ -21,6 +21,13 @@ mode it calculates and verifies the integrity tag internally. In this
21mode, the dm-integrity target can be used to detect silent data 21mode, the dm-integrity target can be used to detect silent data
22corruption on the disk or in the I/O path. 22corruption on the disk or in the I/O path.
23 23
24There's an alternate mode of operation where dm-integrity uses bitmap
25instead of a journal. If a bit in the bitmap is 1, the corresponding
26region's data and integrity tags are not synchronized - if the machine
27crashes, the unsynchronized regions will be recalculated. The bitmap mode
28is faster than the journal mode, because we don't have to write the data
29twice, but it is also less reliable, because if data corruption happens
30when the machine crashes, it may not be detected.
24 31
25When loading the target for the first time, the kernel driver will format 32When loading the target for the first time, the kernel driver will format
26the device. But it will only format the device if the superblock contains 33the device. But it will only format the device if the superblock contains
@@ -59,6 +66,10 @@ Target arguments:
59 either both data and tag or none of them are written. The 66 either both data and tag or none of them are written. The
60 journaled mode degrades write throughput twice because the 67 journaled mode degrades write throughput twice because the
61 data have to be written twice. 68 data have to be written twice.
69 B - bitmap mode - data and metadata are written without any
70 synchronization, the driver maintains a bitmap of dirty
71 regions where data and metadata don't match. This mode can
72 only be used with internal hash.
62 R - recovery mode - in this mode, journal is not replayed, 73 R - recovery mode - in this mode, journal is not replayed,
63 checksums are not checked and writes to the device are not 74 checksums are not checked and writes to the device are not
64 allowed. This mode is useful for data recovery if the 75 allowed. This mode is useful for data recovery if the
@@ -150,6 +161,15 @@ block_size:number
150 Supported values are 512, 1024, 2048 and 4096 bytes. If not 161 Supported values are 512, 1024, 2048 and 4096 bytes. If not
151 specified the default block size is 512 bytes. 162 specified the default block size is 512 bytes.
152 163
164sectors_per_bit:number
165 In the bitmap mode, this parameter specifies the number of
166 512-byte sectors that corresponds to one bitmap bit.
167
168bitmap_flush_interval:number
169 The bitmap flush interval in milliseconds. The metadata buffers
170 are synchronized when this interval expires.
171
172
153The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can 173The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
154be changed when reloading the target (load an inactive table and swap the 174be changed when reloading the target (load an inactive table and swap the
155tables with suspend and resume). The other arguments should not be changed 175tables with suspend and resume). The other arguments should not be changed
@@ -174,6 +194,8 @@ The layout of the formatted block device:
174 * flags 194 * flags
175 SB_FLAG_HAVE_JOURNAL_MAC - a flag is set if journal_mac is used 195 SB_FLAG_HAVE_JOURNAL_MAC - a flag is set if journal_mac is used
176 SB_FLAG_RECALCULATING - recalculating is in progress 196 SB_FLAG_RECALCULATING - recalculating is in progress
197 SB_FLAG_DIRTY_BITMAP - journal area contains the bitmap of dirty
198 blocks
177 * log2(sectors per block) 199 * log2(sectors per block)
178 * a position where recalculating finished 200 * a position where recalculating finished
179* journal 201* journal
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index fb8935d80842..54b3fe1403a8 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -24,6 +24,7 @@
24 24
25#define DEFAULT_INTERLEAVE_SECTORS 32768 25#define DEFAULT_INTERLEAVE_SECTORS 32768
26#define DEFAULT_JOURNAL_SIZE_FACTOR 7 26#define DEFAULT_JOURNAL_SIZE_FACTOR 7
27#define DEFAULT_SECTORS_PER_BITMAP_BIT 32768
27#define DEFAULT_BUFFER_SECTORS 128 28#define DEFAULT_BUFFER_SECTORS 128
28#define DEFAULT_JOURNAL_WATERMARK 50 29#define DEFAULT_JOURNAL_WATERMARK 50
29#define DEFAULT_SYNC_MSEC 10000 30#define DEFAULT_SYNC_MSEC 10000
@@ -33,6 +34,8 @@
33#define METADATA_WORKQUEUE_MAX_ACTIVE 16 34#define METADATA_WORKQUEUE_MAX_ACTIVE 16
34#define RECALC_SECTORS 8192 35#define RECALC_SECTORS 8192
35#define RECALC_WRITE_SUPER 16 36#define RECALC_WRITE_SUPER 16
37#define BITMAP_BLOCK_SIZE 4096 /* don't change it */
38#define BITMAP_FLUSH_INTERVAL (10 * HZ)
36 39
37/* 40/*
38 * Warning - DEBUG_PRINT prints security-sensitive data to the log, 41 * Warning - DEBUG_PRINT prints security-sensitive data to the log,
@@ -48,6 +51,7 @@
48#define SB_MAGIC "integrt" 51#define SB_MAGIC "integrt"
49#define SB_VERSION_1 1 52#define SB_VERSION_1 1
50#define SB_VERSION_2 2 53#define SB_VERSION_2 2
54#define SB_VERSION_3 3
51#define SB_SECTORS 8 55#define SB_SECTORS 8
52#define MAX_SECTORS_PER_BLOCK 8 56#define MAX_SECTORS_PER_BLOCK 8
53 57
@@ -60,12 +64,14 @@ struct superblock {
60 __u64 provided_data_sectors; /* userspace uses this value */ 64 __u64 provided_data_sectors; /* userspace uses this value */
61 __u32 flags; 65 __u32 flags;
62 __u8 log2_sectors_per_block; 66 __u8 log2_sectors_per_block;
63 __u8 pad[3]; 67 __u8 log2_blocks_per_bitmap_bit;
68 __u8 pad[2];
64 __u64 recalc_sector; 69 __u64 recalc_sector;
65}; 70};
66 71
67#define SB_FLAG_HAVE_JOURNAL_MAC 0x1 72#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
68#define SB_FLAG_RECALCULATING 0x2 73#define SB_FLAG_RECALCULATING 0x2
74#define SB_FLAG_DIRTY_BITMAP 0x4
69 75
70#define JOURNAL_ENTRY_ROUNDUP 8 76#define JOURNAL_ENTRY_ROUNDUP 8
71 77
@@ -155,9 +161,16 @@ struct dm_integrity_c {
155 struct workqueue_struct *metadata_wq; 161 struct workqueue_struct *metadata_wq;
156 struct superblock *sb; 162 struct superblock *sb;
157 unsigned journal_pages; 163 unsigned journal_pages;
164 unsigned n_bitmap_blocks;
165
158 struct page_list *journal; 166 struct page_list *journal;
159 struct page_list *journal_io; 167 struct page_list *journal_io;
160 struct page_list *journal_xor; 168 struct page_list *journal_xor;
169 struct page_list *recalc_bitmap;
170 struct page_list *may_write_bitmap;
171 struct bitmap_block_status *bbs;
172 unsigned bitmap_flush_interval;
173 struct delayed_work bitmap_flush_work;
161 174
162 struct crypto_skcipher *journal_crypt; 175 struct crypto_skcipher *journal_crypt;
163 struct scatterlist **journal_scatterlist; 176 struct scatterlist **journal_scatterlist;
@@ -184,6 +197,7 @@ struct dm_integrity_c {
184 __s8 log2_metadata_run; 197 __s8 log2_metadata_run;
185 __u8 log2_buffer_sectors; 198 __u8 log2_buffer_sectors;
186 __u8 sectors_per_block; 199 __u8 sectors_per_block;
200 __u8 log2_blocks_per_bitmap_bit;
187 201
188 unsigned char mode; 202 unsigned char mode;
189 int suspending; 203 int suspending;
@@ -236,6 +250,7 @@ struct dm_integrity_c {
236 250
237 bool journal_uptodate; 251 bool journal_uptodate;
238 bool just_formatted; 252 bool just_formatted;
253 bool recalculate_flag;
239 254
240 struct alg_spec internal_hash_alg; 255 struct alg_spec internal_hash_alg;
241 struct alg_spec journal_crypt_alg; 256 struct alg_spec journal_crypt_alg;
@@ -292,6 +307,16 @@ struct journal_io {
292 struct journal_completion *comp; 307 struct journal_completion *comp;
293}; 308};
294 309
310struct bitmap_block_status {
311 struct work_struct work;
312 struct dm_integrity_c *ic;
313 unsigned idx;
314 unsigned long *bitmap;
315 struct bio_list bio_queue;
316 spinlock_t bio_queue_lock;
317
318};
319
295static struct kmem_cache *journal_io_cache; 320static struct kmem_cache *journal_io_cache;
296 321
297#define JOURNAL_IO_MEMPOOL 32 322#define JOURNAL_IO_MEMPOOL 32
@@ -427,7 +452,9 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
427 452
428static void sb_set_version(struct dm_integrity_c *ic) 453static void sb_set_version(struct dm_integrity_c *ic)
429{ 454{
430 if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 455 if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
456 ic->sb->version = SB_VERSION_3;
457 else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
431 ic->sb->version = SB_VERSION_2; 458 ic->sb->version = SB_VERSION_2;
432 else 459 else
433 ic->sb->version = SB_VERSION_1; 460 ic->sb->version = SB_VERSION_1;
@@ -451,6 +478,135 @@ static int sync_rw_sb(struct dm_integrity_c *ic, int op, int op_flags)
451 return dm_io(&io_req, 1, &io_loc, NULL); 478 return dm_io(&io_req, 1, &io_loc, NULL);
452} 479}
453 480
481#define BITMAP_OP_TEST_ALL_SET 0
482#define BITMAP_OP_TEST_ALL_CLEAR 1
483#define BITMAP_OP_SET 2
484#define BITMAP_OP_CLEAR 3
485
486static bool block_bitmap_op(struct dm_integrity_c *ic, struct page_list *bitmap, sector_t sector, sector_t n_sectors, int mode)
487{
488 unsigned long bit, end_bit, this_end_bit, page, end_page;
489 unsigned long *data;
490
491 if (unlikely(((sector | n_sectors) & ((1 << ic->sb->log2_sectors_per_block) - 1)) != 0)) {
492 DMCRIT("invalid bitmap access (%llx,%llx,%d,%d,%d)\n",
493 (unsigned long long)sector,
494 (unsigned long long)n_sectors,
495 ic->sb->log2_sectors_per_block,
496 ic->log2_blocks_per_bitmap_bit,
497 mode);
498 BUG();
499 }
500
501 if (unlikely(!n_sectors))
502 return true;
503
504 bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
505 end_bit = (sector + n_sectors - 1) >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
506
507 page = bit / (PAGE_SIZE * 8);
508 bit %= PAGE_SIZE * 8;
509
510 end_page = end_bit / (PAGE_SIZE * 8);
511 end_bit %= PAGE_SIZE * 8;
512
513repeat:
514 if (page < end_page) {
515 this_end_bit = PAGE_SIZE * 8 - 1;
516 } else {
517 this_end_bit = end_bit;
518 }
519
520 data = lowmem_page_address(bitmap[page].page);
521
522 if (mode == BITMAP_OP_TEST_ALL_SET) {
523 while (bit <= this_end_bit) {
524 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
525 do {
526 if (data[bit / BITS_PER_LONG] != -1)
527 return false;
528 bit += BITS_PER_LONG;
529 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
530 continue;
531 }
532 if (!test_bit(bit, data))
533 return false;
534 bit++;
535 }
536 } else if (mode == BITMAP_OP_TEST_ALL_CLEAR) {
537 while (bit <= this_end_bit) {
538 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
539 do {
540 if (data[bit / BITS_PER_LONG] != 0)
541 return false;
542 bit += BITS_PER_LONG;
543 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
544 continue;
545 }
546 if (test_bit(bit, data))
547 return false;
548 bit++;
549 }
550 } else if (mode == BITMAP_OP_SET) {
551 while (bit <= this_end_bit) {
552 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
553 do {
554 data[bit / BITS_PER_LONG] = -1;
555 bit += BITS_PER_LONG;
556 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
557 continue;
558 }
559 __set_bit(bit, data);
560 bit++;
561 }
562 } else if (mode == BITMAP_OP_CLEAR) {
563 if (!bit && this_end_bit == PAGE_SIZE * 8 - 1)
564 clear_page(data);
565 else while (bit <= this_end_bit) {
566 if (!(bit % BITS_PER_LONG) && this_end_bit >= bit + BITS_PER_LONG - 1) {
567 do {
568 data[bit / BITS_PER_LONG] = 0;
569 bit += BITS_PER_LONG;
570 } while (this_end_bit >= bit + BITS_PER_LONG - 1);
571 continue;
572 }
573 __clear_bit(bit, data);
574 bit++;
575 }
576 } else {
577 BUG();
578 }
579
580 if (unlikely(page < end_page)) {
581 bit = 0;
582 page++;
583 goto repeat;
584 }
585
586 return true;
587}
588
589static void block_bitmap_copy(struct dm_integrity_c *ic, struct page_list *dst, struct page_list *src)
590{
591 unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
592 unsigned i;
593
594 for (i = 0; i < n_bitmap_pages; i++) {
595 unsigned long *dst_data = lowmem_page_address(dst[i].page);
596 unsigned long *src_data = lowmem_page_address(src[i].page);
597 copy_page(dst_data, src_data);
598 }
599}
600
601static struct bitmap_block_status *sector_to_bitmap_block(struct dm_integrity_c *ic, sector_t sector)
602{
603 unsigned bit = sector >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
604 unsigned bitmap_block = bit / (BITMAP_BLOCK_SIZE * 8);
605
606 BUG_ON(bitmap_block >= ic->n_bitmap_blocks);
607 return &ic->bbs[bitmap_block];
608}
609
454static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset, 610static void access_journal_check(struct dm_integrity_c *ic, unsigned section, unsigned offset,
455 bool e, const char *function) 611 bool e, const char *function)
456{ 612{
@@ -1784,6 +1940,20 @@ offload_to_thread:
1784 goto journal_read_write; 1940 goto journal_read_write;
1785 } 1941 }
1786 1942
1943 if (ic->mode == 'B' && dio->write) {
1944 if (!block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
1945 struct bitmap_block_status *bbs = sector_to_bitmap_block(ic, dio->range.logical_sector);
1946
1947 spin_lock(&bbs->bio_queue_lock);
1948 bio_list_add(&bbs->bio_queue, bio);
1949 spin_unlock(&bbs->bio_queue_lock);
1950
1951 queue_work(ic->writer_wq, &bbs->work);
1952
1953 return;
1954 }
1955 }
1956
1787 dio->in_flight = (atomic_t)ATOMIC_INIT(2); 1957 dio->in_flight = (atomic_t)ATOMIC_INIT(2);
1788 1958
1789 if (need_sync_io) { 1959 if (need_sync_io) {
@@ -1810,10 +1980,14 @@ offload_to_thread:
1810 1980
1811 if (need_sync_io) { 1981 if (need_sync_io) {
1812 wait_for_completion_io(&read_comp); 1982 wait_for_completion_io(&read_comp);
1813 if (unlikely(ic->recalc_wq != NULL) && 1983 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
1814 ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
1815 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector)) 1984 dio->range.logical_sector + dio->range.n_sectors > le64_to_cpu(ic->sb->recalc_sector))
1816 goto skip_check; 1985 goto skip_check;
1986 if (ic->mode == 'B') {
1987 if (!block_bitmap_op(ic, ic->recalc_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_TEST_ALL_CLEAR))
1988 goto skip_check;
1989 }
1990
1817 if (likely(!bio->bi_status)) 1991 if (likely(!bio->bi_status))
1818 integrity_metadata(&dio->work); 1992 integrity_metadata(&dio->work);
1819 else 1993 else
@@ -1851,8 +2025,22 @@ static void pad_uncommitted(struct dm_integrity_c *ic)
1851 wraparound_section(ic, &ic->free_section); 2025 wraparound_section(ic, &ic->free_section);
1852 ic->n_uncommitted_sections++; 2026 ic->n_uncommitted_sections++;
1853 } 2027 }
1854 WARN_ON(ic->journal_sections * ic->journal_section_entries != 2028 if (WARN_ON(ic->journal_sections * ic->journal_section_entries !=
1855 (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors); 2029 (ic->n_uncommitted_sections + ic->n_committed_sections) * ic->journal_section_entries + ic->free_sectors)) {
2030 printk(KERN_CRIT "dm-integrity: "
2031 "journal_sections %u, "
2032 "journal_section_entries %u, "
2033 "n_uncommitted_sections %u, "
2034 "n_committed_sections %u, "
2035 "journal_section_entries %u, "
2036 "free_sectors %u\n",
2037 ic->journal_sections,
2038 ic->journal_section_entries,
2039 ic->n_uncommitted_sections,
2040 ic->n_committed_sections,
2041 ic->journal_section_entries,
2042 ic->free_sectors);
2043 }
1856} 2044}
1857 2045
1858static void integrity_commit(struct work_struct *w) 2046static void integrity_commit(struct work_struct *w)
@@ -2139,11 +2327,14 @@ static void integrity_recalc(struct work_struct *w)
2139 sector_t area, offset; 2327 sector_t area, offset;
2140 sector_t metadata_block; 2328 sector_t metadata_block;
2141 unsigned metadata_offset; 2329 unsigned metadata_offset;
2330 sector_t logical_sector, n_sectors;
2142 __u8 *t; 2331 __u8 *t;
2143 unsigned i; 2332 unsigned i;
2144 int r; 2333 int r;
2145 unsigned super_counter = 0; 2334 unsigned super_counter = 0;
2146 2335
2336 DEBUG_print("start recalculation... (position %llx)\n", le64_to_cpu(ic->sb->recalc_sector));
2337
2147 spin_lock_irq(&ic->endio_wait.lock); 2338 spin_lock_irq(&ic->endio_wait.lock);
2148 2339
2149next_chunk: 2340next_chunk:
@@ -2152,8 +2343,13 @@ next_chunk:
2152 goto unlock_ret; 2343 goto unlock_ret;
2153 2344
2154 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector); 2345 range.logical_sector = le64_to_cpu(ic->sb->recalc_sector);
2155 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) 2346 if (unlikely(range.logical_sector >= ic->provided_data_sectors)) {
2347 if (ic->mode == 'B') {
2348 DEBUG_print("queue_delayed_work: bitmap_flush_work\n");
2349 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, 0);
2350 }
2156 goto unlock_ret; 2351 goto unlock_ret;
2352 }
2157 2353
2158 get_area_and_offset(ic, range.logical_sector, &area, &offset); 2354 get_area_and_offset(ic, range.logical_sector, &area, &offset);
2159 range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector); 2355 range.n_sectors = min((sector_t)RECALC_SECTORS, ic->provided_data_sectors - range.logical_sector);
@@ -2161,11 +2357,33 @@ next_chunk:
2161 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset); 2357 range.n_sectors = min(range.n_sectors, ((sector_t)1U << ic->sb->log2_interleave_sectors) - (unsigned)offset);
2162 2358
2163 add_new_range_and_wait(ic, &range); 2359 add_new_range_and_wait(ic, &range);
2164
2165 spin_unlock_irq(&ic->endio_wait.lock); 2360 spin_unlock_irq(&ic->endio_wait.lock);
2361 logical_sector = range.logical_sector;
2362 n_sectors = range.n_sectors;
2363
2364 if (ic->mode == 'B') {
2365 if (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, n_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
2366 goto advance_and_next;
2367 }
2368 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector, ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2369 logical_sector += ic->sectors_per_block;
2370 n_sectors -= ic->sectors_per_block;
2371 cond_resched();
2372 }
2373 while (block_bitmap_op(ic, ic->recalc_bitmap, logical_sector + n_sectors - ic->sectors_per_block, ic->sectors_per_block, BITMAP_OP_TEST_ALL_CLEAR)) {
2374 n_sectors -= ic->sectors_per_block;
2375 cond_resched();
2376 }
2377 get_area_and_offset(ic, logical_sector, &area, &offset);
2378 }
2379
2380 DEBUG_print("recalculating: %lx, %lx\n", logical_sector, n_sectors);
2166 2381
2167 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) { 2382 if (unlikely(++super_counter == RECALC_WRITE_SUPER)) {
2168 recalc_write_super(ic); 2383 recalc_write_super(ic);
2384 if (ic->mode == 'B') {
2385 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
2386 }
2169 super_counter = 0; 2387 super_counter = 0;
2170 } 2388 }
2171 2389
@@ -2180,7 +2398,7 @@ next_chunk:
2180 io_req.client = ic->io; 2398 io_req.client = ic->io;
2181 io_loc.bdev = ic->dev->bdev; 2399 io_loc.bdev = ic->dev->bdev;
2182 io_loc.sector = get_data_sector(ic, area, offset); 2400 io_loc.sector = get_data_sector(ic, area, offset);
2183 io_loc.count = range.n_sectors; 2401 io_loc.count = n_sectors;
2184 2402
2185 r = dm_io(&io_req, 1, &io_loc, NULL); 2403 r = dm_io(&io_req, 1, &io_loc, NULL);
2186 if (unlikely(r)) { 2404 if (unlikely(r)) {
@@ -2189,8 +2407,8 @@ next_chunk:
2189 } 2407 }
2190 2408
2191 t = ic->recalc_tags; 2409 t = ic->recalc_tags;
2192 for (i = 0; i < range.n_sectors; i += ic->sectors_per_block) { 2410 for (i = 0; i < n_sectors; i += ic->sectors_per_block) {
2193 integrity_sector_checksum(ic, range.logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t); 2411 integrity_sector_checksum(ic, logical_sector + i, ic->recalc_buffer + (i << SECTOR_SHIFT), t);
2194 t += ic->tag_size; 2412 t += ic->tag_size;
2195 } 2413 }
2196 2414
@@ -2202,6 +2420,9 @@ next_chunk:
2202 goto err; 2420 goto err;
2203 } 2421 }
2204 2422
2423advance_and_next:
2424 cond_resched();
2425
2205 spin_lock_irq(&ic->endio_wait.lock); 2426 spin_lock_irq(&ic->endio_wait.lock);
2206 remove_range_unlocked(ic, &range); 2427 remove_range_unlocked(ic, &range);
2207 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors); 2428 ic->sb->recalc_sector = cpu_to_le64(range.logical_sector + range.n_sectors);
@@ -2217,6 +2438,89 @@ unlock_ret:
2217 recalc_write_super(ic); 2438 recalc_write_super(ic);
2218} 2439}
2219 2440
2441static void bitmap_block_work(struct work_struct *w)
2442{
2443 struct bitmap_block_status *bbs = container_of(w, struct bitmap_block_status, work);
2444 struct dm_integrity_c *ic = bbs->ic;
2445 struct bio *bio;
2446 struct bio_list bio_queue;
2447 struct bio_list waiting;
2448
2449 bio_list_init(&waiting);
2450
2451 spin_lock(&bbs->bio_queue_lock);
2452 bio_queue = bbs->bio_queue;
2453 bio_list_init(&bbs->bio_queue);
2454 spin_unlock(&bbs->bio_queue_lock);
2455
2456 while ((bio = bio_list_pop(&bio_queue))) {
2457 struct dm_integrity_io *dio;
2458
2459 dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2460
2461 if (block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_TEST_ALL_SET)) {
2462 remove_range(ic, &dio->range);
2463 INIT_WORK(&dio->work, integrity_bio_wait);
2464 queue_work(ic->wait_wq, &dio->work);
2465 } else {
2466 block_bitmap_op(ic, ic->journal, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_SET);
2467 bio_list_add(&waiting, bio);
2468 }
2469 }
2470
2471 if (bio_list_empty(&waiting))
2472 return;
2473
2474 rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, bbs->idx * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), BITMAP_BLOCK_SIZE >> SECTOR_SHIFT, NULL);
2475
2476 while ((bio = bio_list_pop(&waiting))) {
2477 struct dm_integrity_io *dio = dm_per_bio_data(bio, sizeof(struct dm_integrity_io));
2478
2479 block_bitmap_op(ic, ic->may_write_bitmap, dio->range.logical_sector, dio->range.n_sectors, BITMAP_OP_SET);
2480
2481 remove_range(ic, &dio->range);
2482 INIT_WORK(&dio->work, integrity_bio_wait);
2483 queue_work(ic->wait_wq, &dio->work);
2484 }
2485
2486 queue_delayed_work(ic->commit_wq, &ic->bitmap_flush_work, ic->bitmap_flush_interval);
2487}
2488
2489static void bitmap_flush_work(struct work_struct *work)
2490{
2491 struct dm_integrity_c *ic = container_of(work, struct dm_integrity_c, bitmap_flush_work.work);
2492 struct dm_integrity_range range;
2493 unsigned long limit;
2494
2495 dm_integrity_flush_buffers(ic);
2496
2497 range.logical_sector = 0;
2498 range.n_sectors = ic->provided_data_sectors;
2499
2500 spin_lock_irq(&ic->endio_wait.lock);
2501 add_new_range_and_wait(ic, &range);
2502 spin_unlock_irq(&ic->endio_wait.lock);
2503
2504 dm_integrity_flush_buffers(ic);
2505 if (ic->meta_dev)
2506 blkdev_issue_flush(ic->dev->bdev, GFP_NOIO, NULL);
2507
2508 limit = ic->provided_data_sectors;
2509 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2510 limit = le64_to_cpu(ic->sb->recalc_sector)
2511 >> (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit)
2512 << (ic->sb->log2_sectors_per_block + ic->log2_blocks_per_bitmap_bit);
2513 }
2514 DEBUG_print("zeroing journal\n");
2515 block_bitmap_op(ic, ic->journal, 0, limit, BITMAP_OP_CLEAR);
2516 block_bitmap_op(ic, ic->may_write_bitmap, 0, limit, BITMAP_OP_CLEAR);
2517
2518 rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
2519
2520 remove_range(ic, &range);
2521}
2522
2523
2220static void init_journal(struct dm_integrity_c *ic, unsigned start_section, 2524static void init_journal(struct dm_integrity_c *ic, unsigned start_section,
2221 unsigned n_sections, unsigned char commit_seq) 2525 unsigned n_sections, unsigned char commit_seq)
2222{ 2526{
@@ -2416,6 +2720,7 @@ clear_journal:
2416static void dm_integrity_postsuspend(struct dm_target *ti) 2720static void dm_integrity_postsuspend(struct dm_target *ti)
2417{ 2721{
2418 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2722 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2723 int r;
2419 2724
2420 del_timer_sync(&ic->autocommit_timer); 2725 del_timer_sync(&ic->autocommit_timer);
2421 2726
@@ -2424,6 +2729,9 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
2424 if (ic->recalc_wq) 2729 if (ic->recalc_wq)
2425 drain_workqueue(ic->recalc_wq); 2730 drain_workqueue(ic->recalc_wq);
2426 2731
2732 if (ic->mode == 'B')
2733 cancel_delayed_work_sync(&ic->bitmap_flush_work);
2734
2427 queue_work(ic->commit_wq, &ic->commit_work); 2735 queue_work(ic->commit_wq, &ic->commit_work);
2428 drain_workqueue(ic->commit_wq); 2736 drain_workqueue(ic->commit_wq);
2429 2737
@@ -2434,6 +2742,17 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
2434 dm_integrity_flush_buffers(ic); 2742 dm_integrity_flush_buffers(ic);
2435 } 2743 }
2436 2744
2745 if (ic->mode == 'B') {
2746 dm_integrity_flush_buffers(ic);
2747#if 1
2748 init_journal(ic, 0, ic->journal_sections, 0);
2749 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
2750 r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
2751 if (unlikely(r))
2752 dm_integrity_io_error(ic, "writing superblock", r);
2753#endif
2754 }
2755
2437 WRITE_ONCE(ic->suspending, 0); 2756 WRITE_ONCE(ic->suspending, 0);
2438 2757
2439 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress)); 2758 BUG_ON(!RB_EMPTY_ROOT(&ic->in_progress));
@@ -2444,11 +2763,65 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
2444static void dm_integrity_resume(struct dm_target *ti) 2763static void dm_integrity_resume(struct dm_target *ti)
2445{ 2764{
2446 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private; 2765 struct dm_integrity_c *ic = (struct dm_integrity_c *)ti->private;
2766 int r;
2767 DEBUG_print("resume\n");
2768
2769 if (ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP)) {
2770 DEBUG_print("resume dirty_bitmap\n");
2771 rw_journal_sectors(ic, REQ_OP_READ, 0, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
2772 if (ic->mode == 'B') {
2773 if (ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit) {
2774 block_bitmap_copy(ic, ic->recalc_bitmap, ic->journal);
2775 block_bitmap_copy(ic, ic->may_write_bitmap, ic->journal);
2776 if (!block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR)) {
2777 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
2778 ic->sb->recalc_sector = cpu_to_le64(0);
2779 }
2780 } else {
2781 DEBUG_print("non-matching blocks_per_bitmap_bit: %u, %u\n", ic->sb->log2_blocks_per_bitmap_bit, ic->log2_blocks_per_bitmap_bit);
2782 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
2783 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
2784 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, BITMAP_OP_SET);
2785 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_SET);
2786 rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
2787 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
2788 ic->sb->recalc_sector = cpu_to_le64(0);
2789 }
2790 } else {
2791 if (!(ic->sb->log2_blocks_per_bitmap_bit == ic->log2_blocks_per_bitmap_bit &&
2792 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, BITMAP_OP_TEST_ALL_CLEAR))) {
2793 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
2794 ic->sb->recalc_sector = cpu_to_le64(0);
2795 }
2796 init_journal(ic, 0, ic->journal_sections, 0);
2797 replay_journal(ic);
2798 ic->sb->flags &= ~cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
2799 }
2800 r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
2801 if (unlikely(r))
2802 dm_integrity_io_error(ic, "writing superblock", r);
2803 } else {
2804 replay_journal(ic);
2805 if (ic->mode == 'B') {
2806 int mode;
2807 ic->sb->flags |= cpu_to_le32(SB_FLAG_DIRTY_BITMAP);
2808 ic->sb->log2_blocks_per_bitmap_bit = ic->log2_blocks_per_bitmap_bit;
2809 r = sync_rw_sb(ic, REQ_OP_WRITE, REQ_FUA);
2810 if (unlikely(r))
2811 dm_integrity_io_error(ic, "writing superblock", r);
2812
2813 mode = ic->recalculate_flag ? BITMAP_OP_SET : BITMAP_OP_CLEAR;
2814 block_bitmap_op(ic, ic->journal, 0, ic->provided_data_sectors, mode);
2815 block_bitmap_op(ic, ic->recalc_bitmap, 0, ic->provided_data_sectors, mode);
2816 block_bitmap_op(ic, ic->may_write_bitmap, 0, ic->provided_data_sectors, mode);
2817 rw_journal_sectors(ic, REQ_OP_WRITE, REQ_FUA | REQ_SYNC, 0, ic->n_bitmap_blocks * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT), NULL);
2818 }
2819 }
2447 2820
2448 replay_journal(ic); 2821 DEBUG_print("testing recalc: %x\n", ic->sb->flags);
2449 2822 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2450 if (ic->recalc_wq && ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
2451 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector); 2823 __u64 recalc_pos = le64_to_cpu(ic->sb->recalc_sector);
2824 DEBUG_print("recalc pos: %lx / %lx\n", (long)recalc_pos, ic->provided_data_sectors);
2452 if (recalc_pos < ic->provided_data_sectors) { 2825 if (recalc_pos < ic->provided_data_sectors) {
2453 queue_work(ic->recalc_wq, &ic->recalc_work); 2826 queue_work(ic->recalc_wq, &ic->recalc_work);
2454 } else if (recalc_pos > ic->provided_data_sectors) { 2827 } else if (recalc_pos > ic->provided_data_sectors) {
@@ -2486,6 +2859,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
2486 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); 2859 arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
2487 arg_count += ic->mode == 'J'; 2860 arg_count += ic->mode == 'J';
2488 arg_count += ic->mode == 'J'; 2861 arg_count += ic->mode == 'J';
2862 arg_count += ic->mode == 'B';
2863 arg_count += ic->mode == 'B';
2489 arg_count += !!ic->internal_hash_alg.alg_string; 2864 arg_count += !!ic->internal_hash_alg.alg_string;
2490 arg_count += !!ic->journal_crypt_alg.alg_string; 2865 arg_count += !!ic->journal_crypt_alg.alg_string;
2491 arg_count += !!ic->journal_mac_alg.alg_string; 2866 arg_count += !!ic->journal_mac_alg.alg_string;
@@ -2495,7 +2870,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
2495 DMEMIT(" meta_device:%s", ic->meta_dev->name); 2870 DMEMIT(" meta_device:%s", ic->meta_dev->name);
2496 if (ic->sectors_per_block != 1) 2871 if (ic->sectors_per_block != 1)
2497 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT); 2872 DMEMIT(" block_size:%u", ic->sectors_per_block << SECTOR_SHIFT);
2498 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) 2873 if (ic->recalculate_flag)
2499 DMEMIT(" recalculate"); 2874 DMEMIT(" recalculate");
2500 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); 2875 DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
2501 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); 2876 DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
@@ -2504,6 +2879,10 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
2504 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage); 2879 DMEMIT(" journal_watermark:%u", (unsigned)watermark_percentage);
2505 DMEMIT(" commit_time:%u", ic->autocommit_msec); 2880 DMEMIT(" commit_time:%u", ic->autocommit_msec);
2506 } 2881 }
2882 if (ic->mode == 'B') {
2883 DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
2884 DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
2885 }
2507 2886
2508#define EMIT_ALG(a, n) \ 2887#define EMIT_ALG(a, n) \
2509 do { \ 2888 do { \
@@ -3085,7 +3464,7 @@ bad:
3085 * device 3464 * device
3086 * offset from the start of the device 3465 * offset from the start of the device
3087 * tag size 3466 * tag size
3088 * D - direct writes, J - journal writes, R - recovery mode 3467 * D - direct writes, J - journal writes, B - bitmap mode, R - recovery mode
3089 * number of optional arguments 3468 * number of optional arguments
3090 * optional arguments: 3469 * optional arguments:
3091 * journal_sectors 3470 * journal_sectors
@@ -3095,6 +3474,8 @@ bad:
3095 * commit_time 3474 * commit_time
3096 * meta_device 3475 * meta_device
3097 * block_size 3476 * block_size
3477 * sectors_per_bit
3478 * bitmap_flush_interval
3098 * internal_hash 3479 * internal_hash
3099 * journal_crypt 3480 * journal_crypt
3100 * journal_mac 3481 * journal_mac
@@ -3111,10 +3492,13 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3111 {0, 9, "Invalid number of feature args"}, 3492 {0, 9, "Invalid number of feature args"},
3112 }; 3493 };
3113 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec; 3494 unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
3114 bool recalculate;
3115 bool should_write_sb; 3495 bool should_write_sb;
3116 __u64 threshold; 3496 __u64 threshold;
3117 unsigned long long start; 3497 unsigned long long start;
3498 __s8 log2_sectors_per_bitmap_bit = -1;
3499 __s8 log2_blocks_per_bitmap_bit;
3500 __u64 bits_in_journal;
3501 __u64 n_bitmap_bits;
3118 3502
3119#define DIRECT_ARGUMENTS 4 3503#define DIRECT_ARGUMENTS 4
3120 3504
@@ -3138,6 +3522,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3138 init_waitqueue_head(&ic->copy_to_journal_wait); 3522 init_waitqueue_head(&ic->copy_to_journal_wait);
3139 init_completion(&ic->crypto_backoff); 3523 init_completion(&ic->crypto_backoff);
3140 atomic64_set(&ic->number_of_mismatches, 0); 3524 atomic64_set(&ic->number_of_mismatches, 0);
3525 ic->bitmap_flush_interval = BITMAP_FLUSH_INTERVAL;
3141 3526
3142 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev); 3527 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ic->dev);
3143 if (r) { 3528 if (r) {
@@ -3160,10 +3545,10 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3160 } 3545 }
3161 } 3546 }
3162 3547
3163 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) 3548 if (!strcmp(argv[3], "J") || !strcmp(argv[3], "B") || !strcmp(argv[3], "D") || !strcmp(argv[3], "R")) {
3164 ic->mode = argv[3][0]; 3549 ic->mode = argv[3][0];
3165 else { 3550 } else {
3166 ti->error = "Invalid mode (expecting J, D, R)"; 3551 ti->error = "Invalid mode (expecting J, B, D, R)";
3167 r = -EINVAL; 3552 r = -EINVAL;
3168 goto bad; 3553 goto bad;
3169 } 3554 }
@@ -3173,7 +3558,6 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3173 buffer_sectors = DEFAULT_BUFFER_SECTORS; 3558 buffer_sectors = DEFAULT_BUFFER_SECTORS;
3174 journal_watermark = DEFAULT_JOURNAL_WATERMARK; 3559 journal_watermark = DEFAULT_JOURNAL_WATERMARK;
3175 sync_msec = DEFAULT_SYNC_MSEC; 3560 sync_msec = DEFAULT_SYNC_MSEC;
3176 recalculate = false;
3177 ic->sectors_per_block = 1; 3561 ic->sectors_per_block = 1;
3178 3562
3179 as.argc = argc - DIRECT_ARGUMENTS; 3563 as.argc = argc - DIRECT_ARGUMENTS;
@@ -3185,6 +3569,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3185 while (extra_args--) { 3569 while (extra_args--) {
3186 const char *opt_string; 3570 const char *opt_string;
3187 unsigned val; 3571 unsigned val;
3572 unsigned long long llval;
3188 opt_string = dm_shift_arg(&as); 3573 opt_string = dm_shift_arg(&as);
3189 if (!opt_string) { 3574 if (!opt_string) {
3190 r = -EINVAL; 3575 r = -EINVAL;
@@ -3220,6 +3605,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3220 goto bad; 3605 goto bad;
3221 } 3606 }
3222 ic->sectors_per_block = val >> SECTOR_SHIFT; 3607 ic->sectors_per_block = val >> SECTOR_SHIFT;
3608 } else if (sscanf(opt_string, "sectors_per_bit:%llu%c", &llval, &dummy) == 1) {
3609 log2_sectors_per_bitmap_bit = !llval ? 0 : __ilog2_u64(llval);
3610 } else if (sscanf(opt_string, "bitmap_flush_interval:%u%c", &val, &dummy) == 1) {
3611 if (val >= (uint64_t)UINT_MAX * 1000 / HZ) {
3612 r = -EINVAL;
3613 ti->error = "Invalid bitmap_flush_interval argument";
3614 }
3615 ic->bitmap_flush_interval = msecs_to_jiffies(val);
3223 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) { 3616 } else if (!strncmp(opt_string, "internal_hash:", strlen("internal_hash:"))) {
3224 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error, 3617 r = get_alg_and_key(opt_string, &ic->internal_hash_alg, &ti->error,
3225 "Invalid internal_hash argument"); 3618 "Invalid internal_hash argument");
@@ -3236,7 +3629,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3236 if (r) 3629 if (r)
3237 goto bad; 3630 goto bad;
3238 } else if (!strcmp(opt_string, "recalculate")) { 3631 } else if (!strcmp(opt_string, "recalculate")) {
3239 recalculate = true; 3632 ic->recalculate_flag = true;
3240 } else { 3633 } else {
3241 r = -EINVAL; 3634 r = -EINVAL;
3242 ti->error = "Invalid argument"; 3635 ti->error = "Invalid argument";
@@ -3287,6 +3680,12 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3287 else 3680 else
3288 ic->log2_tag_size = -1; 3681 ic->log2_tag_size = -1;
3289 3682
3683 if (ic->mode == 'B' && !ic->internal_hash) {
3684 r = -EINVAL;
3685 ti->error = "Bitmap mode can be only used with internal hash";
3686 goto bad;
3687 }
3688
3290 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec); 3689 ic->autocommit_jiffies = msecs_to_jiffies(sync_msec);
3291 ic->autocommit_msec = sync_msec; 3690 ic->autocommit_msec = sync_msec;
3292 timer_setup(&ic->autocommit_timer, autocommit_fn, 0); 3691 timer_setup(&ic->autocommit_timer, autocommit_fn, 0);
@@ -3332,7 +3731,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3332 } 3731 }
3333 INIT_WORK(&ic->commit_work, integrity_commit); 3732 INIT_WORK(&ic->commit_work, integrity_commit);
3334 3733
3335 if (ic->mode == 'J') { 3734 if (ic->mode == 'J' || ic->mode == 'B') {
3336 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1); 3735 ic->writer_wq = alloc_workqueue("dm-integrity-writer", WQ_MEM_RECLAIM, 1);
3337 if (!ic->writer_wq) { 3736 if (!ic->writer_wq) {
3338 ti->error = "Cannot allocate workqueue"; 3737 ti->error = "Cannot allocate workqueue";
@@ -3373,7 +3772,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
3373 should_write_sb = true; 3772 should_write_sb = true;
3374 } 3773 }
3375 3774
3376 if (!ic->sb->version || ic->sb->version > SB_VERSION_2) { 3775 if (!ic->sb->version || ic->sb->version > SB_VERSION_3) {
3377 r = -EINVAL; 3776 r = -EINVAL;
3378 ti->error = "Unknown version"; 3777 ti->error = "Unknown version";
3379 goto bad; 3778 goto bad;
@@ -3433,6 +3832,27 @@ try_smaller_buffer:
3433 ti->error = "The device is too small"; 3832 ti->error = "The device is too small";
3434 goto bad; 3833 goto bad;
3435 } 3834 }
3835
3836 if (log2_sectors_per_bitmap_bit < 0)
3837 log2_sectors_per_bitmap_bit = __fls(DEFAULT_SECTORS_PER_BITMAP_BIT);
3838 if (log2_sectors_per_bitmap_bit < ic->sb->log2_sectors_per_block)
3839 log2_sectors_per_bitmap_bit = ic->sb->log2_sectors_per_block;
3840
3841 bits_in_journal = ((__u64)ic->journal_section_sectors * ic->journal_sections) << (SECTOR_SHIFT + 3);
3842 if (bits_in_journal > UINT_MAX)
3843 bits_in_journal = UINT_MAX;
3844 while (bits_in_journal < (ic->provided_data_sectors + ((sector_t)1 << log2_sectors_per_bitmap_bit) - 1) >> log2_sectors_per_bitmap_bit)
3845 log2_sectors_per_bitmap_bit++;
3846
3847 log2_blocks_per_bitmap_bit = log2_sectors_per_bitmap_bit - ic->sb->log2_sectors_per_block;
3848 ic->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
3849 if (should_write_sb) {
3850 ic->sb->log2_blocks_per_bitmap_bit = log2_blocks_per_bitmap_bit;
3851 }
3852 n_bitmap_bits = ((ic->provided_data_sectors >> ic->sb->log2_sectors_per_block)
3853 + (((sector_t)1 << log2_blocks_per_bitmap_bit) - 1)) >> log2_blocks_per_bitmap_bit;
3854 ic->n_bitmap_blocks = DIV_ROUND_UP(n_bitmap_bits, BITMAP_BLOCK_SIZE * 8);
3855
3436 if (!ic->meta_dev) 3856 if (!ic->meta_dev)
3437 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run)); 3857 ic->log2_buffer_sectors = min(ic->log2_buffer_sectors, (__u8)__ffs(ic->metadata_run));
3438 3858
@@ -3457,25 +3877,21 @@ try_smaller_buffer:
3457 DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections)); 3877 DEBUG_print(" journal_sections %u\n", (unsigned)le32_to_cpu(ic->sb->journal_sections));
3458 DEBUG_print(" journal_entries %u\n", ic->journal_entries); 3878 DEBUG_print(" journal_entries %u\n", ic->journal_entries);
3459 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors); 3879 DEBUG_print(" log2_interleave_sectors %d\n", ic->sb->log2_interleave_sectors);
3460 DEBUG_print(" data_device_sectors 0x%llx\n", (unsigned long long)ic->data_device_sectors); 3880 DEBUG_print(" data_device_sectors 0x%llx\n", i_size_read(ic->dev->bdev->bd_inode) >> SECTOR_SHIFT);
3461 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors); 3881 DEBUG_print(" initial_sectors 0x%x\n", ic->initial_sectors);
3462 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run); 3882 DEBUG_print(" metadata_run 0x%x\n", ic->metadata_run);
3463 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run); 3883 DEBUG_print(" log2_metadata_run %d\n", ic->log2_metadata_run);
3464 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors, 3884 DEBUG_print(" provided_data_sectors 0x%llx (%llu)\n", (unsigned long long)ic->provided_data_sectors,
3465 (unsigned long long)ic->provided_data_sectors); 3885 (unsigned long long)ic->provided_data_sectors);
3466 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors); 3886 DEBUG_print(" log2_buffer_sectors %u\n", ic->log2_buffer_sectors);
3887 DEBUG_print(" bits_in_journal %llu\n", (unsigned long long)bits_in_journal);
3467 3888
3468 if (recalculate && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) { 3889 if (ic->recalculate_flag && !(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))) {
3469 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING); 3890 ic->sb->flags |= cpu_to_le32(SB_FLAG_RECALCULATING);
3470 ic->sb->recalc_sector = cpu_to_le64(0); 3891 ic->sb->recalc_sector = cpu_to_le64(0);
3471 } 3892 }
3472 3893
3473 if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) { 3894 if (ic->internal_hash) {
3474 if (!ic->internal_hash) {
3475 r = -EINVAL;
3476 ti->error = "Recalculate is only valid with internal hash";
3477 goto bad;
3478 }
3479 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1); 3895 ic->recalc_wq = alloc_workqueue("dm-integrity-recalc", WQ_MEM_RECLAIM, 1);
3480 if (!ic->recalc_wq ) { 3896 if (!ic->recalc_wq ) {
3481 ti->error = "Cannot allocate workqueue"; 3897 ti->error = "Cannot allocate workqueue";
@@ -3512,6 +3928,45 @@ try_smaller_buffer:
3512 r = create_journal(ic, &ti->error); 3928 r = create_journal(ic, &ti->error);
3513 if (r) 3929 if (r)
3514 goto bad; 3930 goto bad;
3931
3932 }
3933
3934 if (ic->mode == 'B') {
3935 unsigned i;
3936 unsigned n_bitmap_pages = DIV_ROUND_UP(ic->n_bitmap_blocks, PAGE_SIZE / BITMAP_BLOCK_SIZE);
3937
3938 ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
3939 if (!ic->recalc_bitmap) {
3940 r = -ENOMEM;
3941 goto bad;
3942 }
3943 ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
3944 if (!ic->may_write_bitmap) {
3945 r = -ENOMEM;
3946 goto bad;
3947 }
3948 ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
3949 if (!ic->bbs) {
3950 r = -ENOMEM;
3951 goto bad;
3952 }
3953 INIT_DELAYED_WORK(&ic->bitmap_flush_work, bitmap_flush_work);
3954 for (i = 0; i < ic->n_bitmap_blocks; i++) {
3955 struct bitmap_block_status *bbs = &ic->bbs[i];
3956 unsigned sector, pl_index, pl_offset;
3957
3958 INIT_WORK(&bbs->work, bitmap_block_work);
3959 bbs->ic = ic;
3960 bbs->idx = i;
3961 bio_list_init(&bbs->bio_queue);
3962 spin_lock_init(&bbs->bio_queue_lock);
3963
3964 sector = i * (BITMAP_BLOCK_SIZE >> SECTOR_SHIFT);
3965 pl_index = sector >> (PAGE_SHIFT - SECTOR_SHIFT);
3966 pl_offset = (sector << SECTOR_SHIFT) & (PAGE_SIZE - 1);
3967
3968 bbs->bitmap = lowmem_page_address(ic->journal[pl_index].page) + pl_offset;
3969 }
3515 } 3970 }
3516 3971
3517 if (should_write_sb) { 3972 if (should_write_sb) {
@@ -3536,6 +3991,17 @@ try_smaller_buffer:
3536 if (r) 3991 if (r)
3537 goto bad; 3992 goto bad;
3538 } 3993 }
3994 if (ic->mode == 'B') {
3995 unsigned max_io_len = ((sector_t)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit) * (BITMAP_BLOCK_SIZE * 8);
3996 if (!max_io_len)
3997 max_io_len = 1U << 31;
3998 DEBUG_print("max_io_len: old %u, new %u\n", ti->max_io_len, max_io_len);
3999 if (!ti->max_io_len || ti->max_io_len > max_io_len) {
4000 r = dm_set_target_max_io_len(ti, max_io_len);
4001 if (r)
4002 goto bad;
4003 }
4004 }
3539 4005
3540 if (!ic->internal_hash) 4006 if (!ic->internal_hash)
3541 dm_integrity_set(ti, ic); 4007 dm_integrity_set(ti, ic);
@@ -3544,6 +4010,7 @@ try_smaller_buffer:
3544 ti->flush_supported = true; 4010 ti->flush_supported = true;
3545 4011
3546 return 0; 4012 return 0;
4013
3547bad: 4014bad:
3548 dm_integrity_dtr(ti); 4015 dm_integrity_dtr(ti);
3549 return r; 4016 return r;
@@ -3568,6 +4035,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
3568 destroy_workqueue(ic->recalc_wq); 4035 destroy_workqueue(ic->recalc_wq);
3569 vfree(ic->recalc_buffer); 4036 vfree(ic->recalc_buffer);
3570 kvfree(ic->recalc_tags); 4037 kvfree(ic->recalc_tags);
4038 kvfree(ic->bbs);
3571 if (ic->bufio) 4039 if (ic->bufio)
3572 dm_bufio_client_destroy(ic->bufio); 4040 dm_bufio_client_destroy(ic->bufio);
3573 mempool_exit(&ic->journal_io_mempool); 4041 mempool_exit(&ic->journal_io_mempool);
@@ -3580,6 +4048,8 @@ static void dm_integrity_dtr(struct dm_target *ti)
3580 dm_integrity_free_page_list(ic->journal); 4048 dm_integrity_free_page_list(ic->journal);
3581 dm_integrity_free_page_list(ic->journal_io); 4049 dm_integrity_free_page_list(ic->journal_io);
3582 dm_integrity_free_page_list(ic->journal_xor); 4050 dm_integrity_free_page_list(ic->journal_xor);
4051 dm_integrity_free_page_list(ic->recalc_bitmap);
4052 dm_integrity_free_page_list(ic->may_write_bitmap);
3583 if (ic->journal_scatterlist) 4053 if (ic->journal_scatterlist)
3584 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist); 4054 dm_integrity_free_journal_scatterlist(ic, ic->journal_scatterlist);
3585 if (ic->journal_io_scatterlist) 4055 if (ic->journal_io_scatterlist)
@@ -3617,7 +4087,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
3617 4087
3618static struct target_type integrity_target = { 4088static struct target_type integrity_target = {
3619 .name = "integrity", 4089 .name = "integrity",
3620 .version = {1, 2, 0}, 4090 .version = {1, 3, 0},
3621 .module = THIS_MODULE, 4091 .module = THIS_MODULE,
3622 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY, 4092 .features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
3623 .ctr = dm_integrity_ctr, 4093 .ctr = dm_integrity_ctr,