diff options
author | Kent Overstreet <koverstreet@google.com> | 2013-06-05 09:21:07 -0400 |
---|---|---|
committer | Kent Overstreet <koverstreet@google.com> | 2013-06-27 00:57:23 -0400 |
commit | 279afbad4e54acbd61bf88a54a73af3bbfdeb5dd (patch) | |
tree | aefebaa3a7f54049904a275ca3035301603a9be1 | |
parent | 444fc0b6b167ed164e7436621a9d095e042644dd (diff) |
bcache: Track dirty data by stripe
To make background writeback aware of raid5/6 stripes, we first need to
track the amount of dirty data within each stripe - we do this by
breaking up the existing sectors_dirty into per stripe atomic_ts
Signed-off-by: Kent Overstreet <koverstreet@google.com>
-rw-r--r-- | drivers/md/bcache/bcache.h | 10 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 20 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 3 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 32 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 40 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.h | 21 |
7 files changed, 105 insertions, 26 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d099d8894c2f..dbddef0cdb59 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -437,7 +437,10 @@ struct bcache_device { | |||
437 | /* If nonzero, we're detaching/unregistering from cache set */ | 437 | /* If nonzero, we're detaching/unregistering from cache set */ |
438 | atomic_t detaching; | 438 | atomic_t detaching; |
439 | 439 | ||
440 | atomic_long_t sectors_dirty; | 440 | uint64_t nr_stripes; |
441 | unsigned stripe_size_bits; | ||
442 | atomic_t *stripe_sectors_dirty; | ||
443 | |||
441 | unsigned long sectors_dirty_last; | 444 | unsigned long sectors_dirty_last; |
442 | long sectors_dirty_derivative; | 445 | long sectors_dirty_derivative; |
443 | 446 | ||
@@ -1159,9 +1162,6 @@ static inline void wake_up_allocators(struct cache_set *c) | |||
1159 | 1162 | ||
1160 | /* Forward declarations */ | 1163 | /* Forward declarations */ |
1161 | 1164 | ||
1162 | void bch_writeback_queue(struct cached_dev *); | ||
1163 | void bch_writeback_add(struct cached_dev *, unsigned); | ||
1164 | |||
1165 | void bch_count_io_errors(struct cache *, int, const char *); | 1165 | void bch_count_io_errors(struct cache *, int, const char *); |
1166 | void bch_bbio_count_io_errors(struct cache_set *, struct bio *, | 1166 | void bch_bbio_count_io_errors(struct cache_set *, struct bio *, |
1167 | int, const char *); | 1167 | int, const char *); |
@@ -1224,8 +1224,6 @@ void bch_cache_set_stop(struct cache_set *); | |||
1224 | struct cache_set *bch_cache_set_alloc(struct cache_sb *); | 1224 | struct cache_set *bch_cache_set_alloc(struct cache_sb *); |
1225 | void bch_btree_cache_free(struct cache_set *); | 1225 | void bch_btree_cache_free(struct cache_set *); |
1226 | int bch_btree_cache_alloc(struct cache_set *); | 1226 | int bch_btree_cache_alloc(struct cache_set *); |
1227 | void bch_sectors_dirty_init(struct cached_dev *); | ||
1228 | void bch_cached_dev_writeback_init(struct cached_dev *); | ||
1229 | void bch_moving_init_cache_set(struct cache_set *); | 1227 | void bch_moving_init_cache_set(struct cache_set *); |
1230 | 1228 | ||
1231 | int bch_cache_allocator_start(struct cache *ca); | 1229 | int bch_cache_allocator_start(struct cache *ca); |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 230c3a6d9be2..b93cf56260a4 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "btree.h" | 24 | #include "btree.h" |
25 | #include "debug.h" | 25 | #include "debug.h" |
26 | #include "request.h" | 26 | #include "request.h" |
27 | #include "writeback.h" | ||
27 | 28 | ||
28 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
29 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
@@ -1599,14 +1600,14 @@ static bool fix_overlapping_extents(struct btree *b, | |||
1599 | struct btree_iter *iter, | 1600 | struct btree_iter *iter, |
1600 | struct btree_op *op) | 1601 | struct btree_op *op) |
1601 | { | 1602 | { |
1602 | void subtract_dirty(struct bkey *k, int sectors) | 1603 | void subtract_dirty(struct bkey *k, uint64_t offset, int sectors) |
1603 | { | 1604 | { |
1604 | struct bcache_device *d = b->c->devices[KEY_INODE(k)]; | 1605 | if (KEY_DIRTY(k)) |
1605 | 1606 | bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), | |
1606 | if (KEY_DIRTY(k) && d) | 1607 | offset, -sectors); |
1607 | atomic_long_sub(sectors, &d->sectors_dirty); | ||
1608 | } | 1608 | } |
1609 | 1609 | ||
1610 | uint64_t old_offset; | ||
1610 | unsigned old_size, sectors_found = 0; | 1611 | unsigned old_size, sectors_found = 0; |
1611 | 1612 | ||
1612 | while (1) { | 1613 | while (1) { |
@@ -1618,6 +1619,7 @@ static bool fix_overlapping_extents(struct btree *b, | |||
1618 | if (bkey_cmp(k, &START_KEY(insert)) <= 0) | 1619 | if (bkey_cmp(k, &START_KEY(insert)) <= 0) |
1619 | continue; | 1620 | continue; |
1620 | 1621 | ||
1622 | old_offset = KEY_START(k); | ||
1621 | old_size = KEY_SIZE(k); | 1623 | old_size = KEY_SIZE(k); |
1622 | 1624 | ||
1623 | /* | 1625 | /* |
@@ -1673,7 +1675,7 @@ static bool fix_overlapping_extents(struct btree *b, | |||
1673 | 1675 | ||
1674 | struct bkey *top; | 1676 | struct bkey *top; |
1675 | 1677 | ||
1676 | subtract_dirty(k, KEY_SIZE(insert)); | 1678 | subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert)); |
1677 | 1679 | ||
1678 | if (bkey_written(b, k)) { | 1680 | if (bkey_written(b, k)) { |
1679 | /* | 1681 | /* |
@@ -1720,7 +1722,7 @@ static bool fix_overlapping_extents(struct btree *b, | |||
1720 | } | 1722 | } |
1721 | } | 1723 | } |
1722 | 1724 | ||
1723 | subtract_dirty(k, old_size - KEY_SIZE(k)); | 1725 | subtract_dirty(k, old_offset, old_size - KEY_SIZE(k)); |
1724 | } | 1726 | } |
1725 | 1727 | ||
1726 | check_failed: | 1728 | check_failed: |
@@ -1796,6 +1798,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op, | |||
1796 | insert: shift_keys(b, m, k); | 1798 | insert: shift_keys(b, m, k); |
1797 | copy: bkey_copy(m, k); | 1799 | copy: bkey_copy(m, k); |
1798 | merged: | 1800 | merged: |
1801 | if (KEY_DIRTY(k)) | ||
1802 | bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k), | ||
1803 | KEY_START(k), KEY_SIZE(k)); | ||
1804 | |||
1799 | bch_check_keys(b, "%u for %s", status, op_type(op)); | 1805 | bch_check_keys(b, "%u for %s", status, op_type(op)); |
1800 | 1806 | ||
1801 | if (b->level && !KEY_OFFSET(k)) | 1807 | if (b->level && !KEY_OFFSET(k)) |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 695469958c1e..017c95fced8e 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include "btree.h" | 10 | #include "btree.h" |
11 | #include "debug.h" | 11 | #include "debug.h" |
12 | #include "request.h" | 12 | #include "request.h" |
13 | #include "writeback.h" | ||
13 | 14 | ||
14 | #include <linux/cgroup.h> | 15 | #include <linux/cgroup.h> |
15 | #include <linux/module.h> | 16 | #include <linux/module.h> |
@@ -1044,7 +1045,7 @@ static void request_write(struct cached_dev *dc, struct search *s) | |||
1044 | closure_bio_submit(bio, cl, s->d); | 1045 | closure_bio_submit(bio, cl, s->d); |
1045 | } else { | 1046 | } else { |
1046 | s->op.cache_bio = bio; | 1047 | s->op.cache_bio = bio; |
1047 | bch_writeback_add(dc, bio_sectors(bio)); | 1048 | bch_writeback_add(dc); |
1048 | } | 1049 | } |
1049 | out: | 1050 | out: |
1050 | closure_call(&s->op.cl, bch_insert_data, NULL, cl); | 1051 | closure_call(&s->op.cl, bch_insert_data, NULL, cl); |
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index dbfa1c38e85e..8c73f0c7f28a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include "btree.h" | 10 | #include "btree.h" |
11 | #include "debug.h" | 11 | #include "debug.h" |
12 | #include "request.h" | 12 | #include "request.h" |
13 | #include "writeback.h" | ||
13 | 14 | ||
14 | #include <linux/blkdev.h> | 15 | #include <linux/blkdev.h> |
15 | #include <linux/buffer_head.h> | 16 | #include <linux/buffer_head.h> |
@@ -744,13 +745,35 @@ static void bcache_device_free(struct bcache_device *d) | |||
744 | mempool_destroy(d->unaligned_bvec); | 745 | mempool_destroy(d->unaligned_bvec); |
745 | if (d->bio_split) | 746 | if (d->bio_split) |
746 | bioset_free(d->bio_split); | 747 | bioset_free(d->bio_split); |
748 | if (is_vmalloc_addr(d->stripe_sectors_dirty)) | ||
749 | vfree(d->stripe_sectors_dirty); | ||
750 | else | ||
751 | kfree(d->stripe_sectors_dirty); | ||
747 | 752 | ||
748 | closure_debug_destroy(&d->cl); | 753 | closure_debug_destroy(&d->cl); |
749 | } | 754 | } |
750 | 755 | ||
751 | static int bcache_device_init(struct bcache_device *d, unsigned block_size) | 756 | static int bcache_device_init(struct bcache_device *d, unsigned block_size, |
757 | sector_t sectors) | ||
752 | { | 758 | { |
753 | struct request_queue *q; | 759 | struct request_queue *q; |
760 | size_t n; | ||
761 | |||
762 | if (!d->stripe_size_bits) | ||
763 | d->stripe_size_bits = 31; | ||
764 | |||
765 | d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >> | ||
766 | d->stripe_size_bits; | ||
767 | |||
768 | if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) | ||
769 | return -ENOMEM; | ||
770 | |||
771 | n = d->nr_stripes * sizeof(atomic_t); | ||
772 | d->stripe_sectors_dirty = n < PAGE_SIZE << 6 | ||
773 | ? kzalloc(n, GFP_KERNEL) | ||
774 | : vzalloc(n); | ||
775 | if (!d->stripe_sectors_dirty) | ||
776 | return -ENOMEM; | ||
754 | 777 | ||
755 | if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || | 778 | if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || |
756 | !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, | 779 | !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, |
@@ -760,6 +783,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size) | |||
760 | !(q = blk_alloc_queue(GFP_KERNEL))) | 783 | !(q = blk_alloc_queue(GFP_KERNEL))) |
761 | return -ENOMEM; | 784 | return -ENOMEM; |
762 | 785 | ||
786 | set_capacity(d->disk, sectors); | ||
763 | snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); | 787 | snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); |
764 | 788 | ||
765 | d->disk->major = bcache_major; | 789 | d->disk->major = bcache_major; |
@@ -1047,7 +1071,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) | |||
1047 | hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); | 1071 | hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); |
1048 | } | 1072 | } |
1049 | 1073 | ||
1050 | ret = bcache_device_init(&dc->disk, block_size); | 1074 | ret = bcache_device_init(&dc->disk, block_size, |
1075 | dc->bdev->bd_part->nr_sects - dc->sb.data_offset); | ||
1051 | if (ret) | 1076 | if (ret) |
1052 | return ret; | 1077 | return ret; |
1053 | 1078 | ||
@@ -1146,11 +1171,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u) | |||
1146 | 1171 | ||
1147 | kobject_init(&d->kobj, &bch_flash_dev_ktype); | 1172 | kobject_init(&d->kobj, &bch_flash_dev_ktype); |
1148 | 1173 | ||
1149 | if (bcache_device_init(d, block_bytes(c))) | 1174 | if (bcache_device_init(d, block_bytes(c), u->sectors)) |
1150 | goto err; | 1175 | goto err; |
1151 | 1176 | ||
1152 | bcache_device_attach(d, c, u - c->uuids); | 1177 | bcache_device_attach(d, c, u - c->uuids); |
1153 | set_capacity(d->disk, u->sectors); | ||
1154 | bch_flash_dev_request_init(d); | 1178 | bch_flash_dev_request_init(d); |
1155 | add_disk(d->disk); | 1179 | add_disk(d->disk); |
1156 | 1180 | ||
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index f5c2d8695230..cf8d91ec3238 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include "sysfs.h" | 9 | #include "sysfs.h" |
10 | #include "btree.h" | 10 | #include "btree.h" |
11 | #include "request.h" | 11 | #include "request.h" |
12 | #include "writeback.h" | ||
12 | 13 | ||
13 | #include <linux/blkdev.h> | 14 | #include <linux/blkdev.h> |
14 | #include <linux/sort.h> | 15 | #include <linux/sort.h> |
@@ -128,7 +129,7 @@ SHOW(__bch_cached_dev) | |||
128 | char derivative[20]; | 129 | char derivative[20]; |
129 | char target[20]; | 130 | char target[20]; |
130 | bch_hprint(dirty, | 131 | bch_hprint(dirty, |
131 | atomic_long_read(&dc->disk.sectors_dirty) << 9); | 132 | bcache_dev_sectors_dirty(&dc->disk) << 9); |
132 | bch_hprint(derivative, dc->writeback_rate_derivative << 9); | 133 | bch_hprint(derivative, dc->writeback_rate_derivative << 9); |
133 | bch_hprint(target, dc->writeback_rate_target << 9); | 134 | bch_hprint(target, dc->writeback_rate_target << 9); |
134 | 135 | ||
@@ -144,7 +145,7 @@ SHOW(__bch_cached_dev) | |||
144 | } | 145 | } |
145 | 146 | ||
146 | sysfs_hprint(dirty_data, | 147 | sysfs_hprint(dirty_data, |
147 | atomic_long_read(&dc->disk.sectors_dirty) << 9); | 148 | bcache_dev_sectors_dirty(&dc->disk) << 9); |
148 | 149 | ||
149 | var_printf(sequential_merge, "%i"); | 150 | var_printf(sequential_merge, "%i"); |
150 | var_hprint(sequential_cutoff); | 151 | var_hprint(sequential_cutoff); |
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 553949eefd51..dd815475c524 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include "bcache.h" | 9 | #include "bcache.h" |
10 | #include "btree.h" | 10 | #include "btree.h" |
11 | #include "debug.h" | 11 | #include "debug.h" |
12 | #include "writeback.h" | ||
12 | 13 | ||
13 | #include <trace/events/bcache.h> | 14 | #include <trace/events/bcache.h> |
14 | 15 | ||
@@ -38,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc) | |||
38 | 39 | ||
39 | int change = 0; | 40 | int change = 0; |
40 | int64_t error; | 41 | int64_t error; |
41 | int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); | 42 | int64_t dirty = bcache_dev_sectors_dirty(&dc->disk); |
42 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; | 43 | int64_t derivative = dirty - dc->disk.sectors_dirty_last; |
43 | 44 | ||
44 | dc->disk.sectors_dirty_last = dirty; | 45 | dc->disk.sectors_dirty_last = dirty; |
@@ -183,10 +184,8 @@ void bch_writeback_queue(struct cached_dev *dc) | |||
183 | } | 184 | } |
184 | } | 185 | } |
185 | 186 | ||
186 | void bch_writeback_add(struct cached_dev *dc, unsigned sectors) | 187 | void bch_writeback_add(struct cached_dev *dc) |
187 | { | 188 | { |
188 | atomic_long_add(sectors, &dc->disk.sectors_dirty); | ||
189 | |||
190 | if (!atomic_read(&dc->has_dirty) && | 189 | if (!atomic_read(&dc->has_dirty) && |
191 | !atomic_xchg(&dc->has_dirty, 1)) { | 190 | !atomic_xchg(&dc->has_dirty, 1)) { |
192 | atomic_inc(&dc->count); | 191 | atomic_inc(&dc->count); |
@@ -205,6 +204,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors) | |||
205 | } | 204 | } |
206 | } | 205 | } |
207 | 206 | ||
207 | void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode, | ||
208 | uint64_t offset, int nr_sectors) | ||
209 | { | ||
210 | struct bcache_device *d = c->devices[inode]; | ||
211 | unsigned stripe_size, stripe_offset; | ||
212 | uint64_t stripe; | ||
213 | |||
214 | if (!d) | ||
215 | return; | ||
216 | |||
217 | stripe_size = 1 << d->stripe_size_bits; | ||
218 | stripe = offset >> d->stripe_size_bits; | ||
219 | stripe_offset = offset & (stripe_size - 1); | ||
220 | |||
221 | while (nr_sectors) { | ||
222 | int s = min_t(unsigned, abs(nr_sectors), | ||
223 | stripe_size - stripe_offset); | ||
224 | |||
225 | if (nr_sectors < 0) | ||
226 | s = -s; | ||
227 | |||
228 | atomic_add(s, d->stripe_sectors_dirty + stripe); | ||
229 | nr_sectors -= s; | ||
230 | stripe_offset = 0; | ||
231 | stripe++; | ||
232 | } | ||
233 | } | ||
234 | |||
208 | /* Background writeback - IO loop */ | 235 | /* Background writeback - IO loop */ |
209 | 236 | ||
210 | static void dirty_io_destructor(struct closure *cl) | 237 | static void dirty_io_destructor(struct closure *cl) |
@@ -392,8 +419,9 @@ static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op, | |||
392 | break; | 419 | break; |
393 | 420 | ||
394 | if (KEY_DIRTY(k)) | 421 | if (KEY_DIRTY(k)) |
395 | atomic_long_add(KEY_SIZE(k), | 422 | bcache_dev_sectors_dirty_add(b->c, dc->disk.id, |
396 | &dc->disk.sectors_dirty); | 423 | KEY_START(k), |
424 | KEY_SIZE(k)); | ||
397 | } else { | 425 | } else { |
398 | btree(sectors_dirty_init, k, b, op, dc); | 426 | btree(sectors_dirty_init, k, b, op, dc); |
399 | if (KEY_INODE(k) > dc->disk.id) | 427 | if (KEY_INODE(k) > dc->disk.id) |
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h new file mode 100644 index 000000000000..5ce9771df047 --- /dev/null +++ b/drivers/md/bcache/writeback.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef _BCACHE_WRITEBACK_H | ||
2 | #define _BCACHE_WRITEBACK_H | ||
3 | |||
4 | static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d) | ||
5 | { | ||
6 | uint64_t i, ret = 0; | ||
7 | |||
8 | for (i = 0; i < d->nr_stripes; i++) | ||
9 | ret += atomic_read(d->stripe_sectors_dirty + i); | ||
10 | |||
11 | return ret; | ||
12 | } | ||
13 | |||
14 | void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); | ||
15 | void bch_writeback_queue(struct cached_dev *); | ||
16 | void bch_writeback_add(struct cached_dev *); | ||
17 | |||
18 | void bch_sectors_dirty_init(struct cached_dev *dc); | ||
19 | void bch_cached_dev_writeback_init(struct cached_dev *); | ||
20 | |||
21 | #endif | ||