aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKent Overstreet <koverstreet@google.com>2013-06-05 09:21:07 -0400
committerKent Overstreet <koverstreet@google.com>2013-06-27 00:57:23 -0400
commit279afbad4e54acbd61bf88a54a73af3bbfdeb5dd (patch)
treeaefebaa3a7f54049904a275ca3035301603a9be1
parent444fc0b6b167ed164e7436621a9d095e042644dd (diff)
bcache: Track dirty data by stripe
To make background writeback aware of raid5/6 stripes, we first need to track the amount of dirty data within each stripe - we do this by breaking up the existing sectors_dirty into per stripe atomic_ts Signed-off-by: Kent Overstreet <koverstreet@google.com>
-rw-r--r--drivers/md/bcache/bcache.h10
-rw-r--r--drivers/md/bcache/btree.c20
-rw-r--r--drivers/md/bcache/request.c3
-rw-r--r--drivers/md/bcache/super.c32
-rw-r--r--drivers/md/bcache/sysfs.c5
-rw-r--r--drivers/md/bcache/writeback.c40
-rw-r--r--drivers/md/bcache/writeback.h21
7 files changed, 105 insertions, 26 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d099d8894c2f..dbddef0cdb59 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -437,7 +437,10 @@ struct bcache_device {
437 /* If nonzero, we're detaching/unregistering from cache set */ 437 /* If nonzero, we're detaching/unregistering from cache set */
438 atomic_t detaching; 438 atomic_t detaching;
439 439
440 atomic_long_t sectors_dirty; 440 uint64_t nr_stripes;
441 unsigned stripe_size_bits;
442 atomic_t *stripe_sectors_dirty;
443
441 unsigned long sectors_dirty_last; 444 unsigned long sectors_dirty_last;
442 long sectors_dirty_derivative; 445 long sectors_dirty_derivative;
443 446
@@ -1159,9 +1162,6 @@ static inline void wake_up_allocators(struct cache_set *c)
1159 1162
1160/* Forward declarations */ 1163/* Forward declarations */
1161 1164
1162void bch_writeback_queue(struct cached_dev *);
1163void bch_writeback_add(struct cached_dev *, unsigned);
1164
1165void bch_count_io_errors(struct cache *, int, const char *); 1165void bch_count_io_errors(struct cache *, int, const char *);
1166void bch_bbio_count_io_errors(struct cache_set *, struct bio *, 1166void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
1167 int, const char *); 1167 int, const char *);
@@ -1224,8 +1224,6 @@ void bch_cache_set_stop(struct cache_set *);
1224struct cache_set *bch_cache_set_alloc(struct cache_sb *); 1224struct cache_set *bch_cache_set_alloc(struct cache_sb *);
1225void bch_btree_cache_free(struct cache_set *); 1225void bch_btree_cache_free(struct cache_set *);
1226int bch_btree_cache_alloc(struct cache_set *); 1226int bch_btree_cache_alloc(struct cache_set *);
1227void bch_sectors_dirty_init(struct cached_dev *);
1228void bch_cached_dev_writeback_init(struct cached_dev *);
1229void bch_moving_init_cache_set(struct cache_set *); 1227void bch_moving_init_cache_set(struct cache_set *);
1230 1228
1231int bch_cache_allocator_start(struct cache *ca); 1229int bch_cache_allocator_start(struct cache *ca);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 230c3a6d9be2..b93cf56260a4 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -24,6 +24,7 @@
24#include "btree.h" 24#include "btree.h"
25#include "debug.h" 25#include "debug.h"
26#include "request.h" 26#include "request.h"
27#include "writeback.h"
27 28
28#include <linux/slab.h> 29#include <linux/slab.h>
29#include <linux/bitops.h> 30#include <linux/bitops.h>
@@ -1599,14 +1600,14 @@ static bool fix_overlapping_extents(struct btree *b,
1599 struct btree_iter *iter, 1600 struct btree_iter *iter,
1600 struct btree_op *op) 1601 struct btree_op *op)
1601{ 1602{
1602 void subtract_dirty(struct bkey *k, int sectors) 1603 void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
1603 { 1604 {
1604 struct bcache_device *d = b->c->devices[KEY_INODE(k)]; 1605 if (KEY_DIRTY(k))
1605 1606 bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
1606 if (KEY_DIRTY(k) && d) 1607 offset, -sectors);
1607 atomic_long_sub(sectors, &d->sectors_dirty);
1608 } 1608 }
1609 1609
1610 uint64_t old_offset;
1610 unsigned old_size, sectors_found = 0; 1611 unsigned old_size, sectors_found = 0;
1611 1612
1612 while (1) { 1613 while (1) {
@@ -1618,6 +1619,7 @@ static bool fix_overlapping_extents(struct btree *b,
1618 if (bkey_cmp(k, &START_KEY(insert)) <= 0) 1619 if (bkey_cmp(k, &START_KEY(insert)) <= 0)
1619 continue; 1620 continue;
1620 1621
1622 old_offset = KEY_START(k);
1621 old_size = KEY_SIZE(k); 1623 old_size = KEY_SIZE(k);
1622 1624
1623 /* 1625 /*
@@ -1673,7 +1675,7 @@ static bool fix_overlapping_extents(struct btree *b,
1673 1675
1674 struct bkey *top; 1676 struct bkey *top;
1675 1677
1676 subtract_dirty(k, KEY_SIZE(insert)); 1678 subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
1677 1679
1678 if (bkey_written(b, k)) { 1680 if (bkey_written(b, k)) {
1679 /* 1681 /*
@@ -1720,7 +1722,7 @@ static bool fix_overlapping_extents(struct btree *b,
1720 } 1722 }
1721 } 1723 }
1722 1724
1723 subtract_dirty(k, old_size - KEY_SIZE(k)); 1725 subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
1724 } 1726 }
1725 1727
1726check_failed: 1728check_failed:
@@ -1796,6 +1798,10 @@ static bool btree_insert_key(struct btree *b, struct btree_op *op,
1796insert: shift_keys(b, m, k); 1798insert: shift_keys(b, m, k);
1797copy: bkey_copy(m, k); 1799copy: bkey_copy(m, k);
1798merged: 1800merged:
1801 if (KEY_DIRTY(k))
1802 bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
1803 KEY_START(k), KEY_SIZE(k));
1804
1799 bch_check_keys(b, "%u for %s", status, op_type(op)); 1805 bch_check_keys(b, "%u for %s", status, op_type(op));
1800 1806
1801 if (b->level && !KEY_OFFSET(k)) 1807 if (b->level && !KEY_OFFSET(k))
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 695469958c1e..017c95fced8e 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -10,6 +10,7 @@
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12#include "request.h" 12#include "request.h"
13#include "writeback.h"
13 14
14#include <linux/cgroup.h> 15#include <linux/cgroup.h>
15#include <linux/module.h> 16#include <linux/module.h>
@@ -1044,7 +1045,7 @@ static void request_write(struct cached_dev *dc, struct search *s)
1044 closure_bio_submit(bio, cl, s->d); 1045 closure_bio_submit(bio, cl, s->d);
1045 } else { 1046 } else {
1046 s->op.cache_bio = bio; 1047 s->op.cache_bio = bio;
1047 bch_writeback_add(dc, bio_sectors(bio)); 1048 bch_writeback_add(dc);
1048 } 1049 }
1049out: 1050out:
1050 closure_call(&s->op.cl, bch_insert_data, NULL, cl); 1051 closure_call(&s->op.cl, bch_insert_data, NULL, cl);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index dbfa1c38e85e..8c73f0c7f28a 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -10,6 +10,7 @@
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12#include "request.h" 12#include "request.h"
13#include "writeback.h"
13 14
14#include <linux/blkdev.h> 15#include <linux/blkdev.h>
15#include <linux/buffer_head.h> 16#include <linux/buffer_head.h>
@@ -744,13 +745,35 @@ static void bcache_device_free(struct bcache_device *d)
744 mempool_destroy(d->unaligned_bvec); 745 mempool_destroy(d->unaligned_bvec);
745 if (d->bio_split) 746 if (d->bio_split)
746 bioset_free(d->bio_split); 747 bioset_free(d->bio_split);
748 if (is_vmalloc_addr(d->stripe_sectors_dirty))
749 vfree(d->stripe_sectors_dirty);
750 else
751 kfree(d->stripe_sectors_dirty);
747 752
748 closure_debug_destroy(&d->cl); 753 closure_debug_destroy(&d->cl);
749} 754}
750 755
751static int bcache_device_init(struct bcache_device *d, unsigned block_size) 756static int bcache_device_init(struct bcache_device *d, unsigned block_size,
757 sector_t sectors)
752{ 758{
753 struct request_queue *q; 759 struct request_queue *q;
760 size_t n;
761
762 if (!d->stripe_size_bits)
763 d->stripe_size_bits = 31;
764
765 d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
766 d->stripe_size_bits;
767
768 if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
769 return -ENOMEM;
770
771 n = d->nr_stripes * sizeof(atomic_t);
772 d->stripe_sectors_dirty = n < PAGE_SIZE << 6
773 ? kzalloc(n, GFP_KERNEL)
774 : vzalloc(n);
775 if (!d->stripe_sectors_dirty)
776 return -ENOMEM;
754 777
755 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || 778 if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
756 !(d->unaligned_bvec = mempool_create_kmalloc_pool(1, 779 !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -760,6 +783,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
760 !(q = blk_alloc_queue(GFP_KERNEL))) 783 !(q = blk_alloc_queue(GFP_KERNEL)))
761 return -ENOMEM; 784 return -ENOMEM;
762 785
786 set_capacity(d->disk, sectors);
763 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor); 787 snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
764 788
765 d->disk->major = bcache_major; 789 d->disk->major = bcache_major;
@@ -1047,7 +1071,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
1047 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO); 1071 hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
1048 } 1072 }
1049 1073
1050 ret = bcache_device_init(&dc->disk, block_size); 1074 ret = bcache_device_init(&dc->disk, block_size,
1075 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
1051 if (ret) 1076 if (ret)
1052 return ret; 1077 return ret;
1053 1078
@@ -1146,11 +1171,10 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
1146 1171
1147 kobject_init(&d->kobj, &bch_flash_dev_ktype); 1172 kobject_init(&d->kobj, &bch_flash_dev_ktype);
1148 1173
1149 if (bcache_device_init(d, block_bytes(c))) 1174 if (bcache_device_init(d, block_bytes(c), u->sectors))
1150 goto err; 1175 goto err;
1151 1176
1152 bcache_device_attach(d, c, u - c->uuids); 1177 bcache_device_attach(d, c, u - c->uuids);
1153 set_capacity(d->disk, u->sectors);
1154 bch_flash_dev_request_init(d); 1178 bch_flash_dev_request_init(d);
1155 add_disk(d->disk); 1179 add_disk(d->disk);
1156 1180
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index f5c2d8695230..cf8d91ec3238 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -9,6 +9,7 @@
9#include "sysfs.h" 9#include "sysfs.h"
10#include "btree.h" 10#include "btree.h"
11#include "request.h" 11#include "request.h"
12#include "writeback.h"
12 13
13#include <linux/blkdev.h> 14#include <linux/blkdev.h>
14#include <linux/sort.h> 15#include <linux/sort.h>
@@ -128,7 +129,7 @@ SHOW(__bch_cached_dev)
128 char derivative[20]; 129 char derivative[20];
129 char target[20]; 130 char target[20];
130 bch_hprint(dirty, 131 bch_hprint(dirty,
131 atomic_long_read(&dc->disk.sectors_dirty) << 9); 132 bcache_dev_sectors_dirty(&dc->disk) << 9);
132 bch_hprint(derivative, dc->writeback_rate_derivative << 9); 133 bch_hprint(derivative, dc->writeback_rate_derivative << 9);
133 bch_hprint(target, dc->writeback_rate_target << 9); 134 bch_hprint(target, dc->writeback_rate_target << 9);
134 135
@@ -144,7 +145,7 @@ SHOW(__bch_cached_dev)
144 } 145 }
145 146
146 sysfs_hprint(dirty_data, 147 sysfs_hprint(dirty_data,
147 atomic_long_read(&dc->disk.sectors_dirty) << 9); 148 bcache_dev_sectors_dirty(&dc->disk) << 9);
148 149
149 var_printf(sequential_merge, "%i"); 150 var_printf(sequential_merge, "%i");
150 var_hprint(sequential_cutoff); 151 var_hprint(sequential_cutoff);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 553949eefd51..dd815475c524 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -9,6 +9,7 @@
9#include "bcache.h" 9#include "bcache.h"
10#include "btree.h" 10#include "btree.h"
11#include "debug.h" 11#include "debug.h"
12#include "writeback.h"
12 13
13#include <trace/events/bcache.h> 14#include <trace/events/bcache.h>
14 15
@@ -38,7 +39,7 @@ static void __update_writeback_rate(struct cached_dev *dc)
38 39
39 int change = 0; 40 int change = 0;
40 int64_t error; 41 int64_t error;
41 int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty); 42 int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
42 int64_t derivative = dirty - dc->disk.sectors_dirty_last; 43 int64_t derivative = dirty - dc->disk.sectors_dirty_last;
43 44
44 dc->disk.sectors_dirty_last = dirty; 45 dc->disk.sectors_dirty_last = dirty;
@@ -183,10 +184,8 @@ void bch_writeback_queue(struct cached_dev *dc)
183 } 184 }
184} 185}
185 186
186void bch_writeback_add(struct cached_dev *dc, unsigned sectors) 187void bch_writeback_add(struct cached_dev *dc)
187{ 188{
188 atomic_long_add(sectors, &dc->disk.sectors_dirty);
189
190 if (!atomic_read(&dc->has_dirty) && 189 if (!atomic_read(&dc->has_dirty) &&
191 !atomic_xchg(&dc->has_dirty, 1)) { 190 !atomic_xchg(&dc->has_dirty, 1)) {
192 atomic_inc(&dc->count); 191 atomic_inc(&dc->count);
@@ -205,6 +204,34 @@ void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
205 } 204 }
206} 205}
207 206
207void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
208 uint64_t offset, int nr_sectors)
209{
210 struct bcache_device *d = c->devices[inode];
211 unsigned stripe_size, stripe_offset;
212 uint64_t stripe;
213
214 if (!d)
215 return;
216
217 stripe_size = 1 << d->stripe_size_bits;
218 stripe = offset >> d->stripe_size_bits;
219 stripe_offset = offset & (stripe_size - 1);
220
221 while (nr_sectors) {
222 int s = min_t(unsigned, abs(nr_sectors),
223 stripe_size - stripe_offset);
224
225 if (nr_sectors < 0)
226 s = -s;
227
228 atomic_add(s, d->stripe_sectors_dirty + stripe);
229 nr_sectors -= s;
230 stripe_offset = 0;
231 stripe++;
232 }
233}
234
208/* Background writeback - IO loop */ 235/* Background writeback - IO loop */
209 236
210static void dirty_io_destructor(struct closure *cl) 237static void dirty_io_destructor(struct closure *cl)
@@ -392,8 +419,9 @@ static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op,
392 break; 419 break;
393 420
394 if (KEY_DIRTY(k)) 421 if (KEY_DIRTY(k))
395 atomic_long_add(KEY_SIZE(k), 422 bcache_dev_sectors_dirty_add(b->c, dc->disk.id,
396 &dc->disk.sectors_dirty); 423 KEY_START(k),
424 KEY_SIZE(k));
397 } else { 425 } else {
398 btree(sectors_dirty_init, k, b, op, dc); 426 btree(sectors_dirty_init, k, b, op, dc);
399 if (KEY_INODE(k) > dc->disk.id) 427 if (KEY_INODE(k) > dc->disk.id)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
new file mode 100644
index 000000000000..5ce9771df047
--- /dev/null
+++ b/drivers/md/bcache/writeback.h
@@ -0,0 +1,21 @@
1#ifndef _BCACHE_WRITEBACK_H
2#define _BCACHE_WRITEBACK_H
3
4static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
5{
6 uint64_t i, ret = 0;
7
8 for (i = 0; i < d->nr_stripes; i++)
9 ret += atomic_read(d->stripe_sectors_dirty + i);
10
11 return ret;
12}
13
14void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
15void bch_writeback_queue(struct cached_dev *);
16void bch_writeback_add(struct cached_dev *);
17
18void bch_sectors_dirty_init(struct cached_dev *dc);
19void bch_cached_dev_writeback_init(struct cached_dev *);
20
21#endif